SDValue V2);
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
-
+
bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
-
+
if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) {
if (is64Bit) return new X8664_MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
return new X8632_ELFTargetObjectFile(TM);
} else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) {
return new TargetLoweringObjectFileCOFF();
- }
+ }
llvm_unreachable("unknown subtarget type");
}
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
+ if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
+ // Setup Windows compiler runtime calls.
+ setLibcallName(RTLIB::SDIV_I64, "_alldiv");
+ setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
+ }
+
if (Subtarget->isTargetDarwin()) {
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(false);
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
- if (!X86ScalarSSEf64) {
+ if (!X86ScalarSSEf64) {
setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand);
- // Without SSE, i64->f64 goes through memory; i64->MMX is Legal.
- if (Subtarget->hasMMX() && !DisableMMX)
- setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom);
- else
- setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand);
+ // Without SSE, i64->f64 goes through memory.
+ setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand);
}
}
// We may not have a libcall for MEMBARRIER so we should lower this.
setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
-
+
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
// operations are flushed before issuing the locked instruction, and they
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
- if (Subtarget->isTargetCygMing())
+ if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) {
- addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false);
- addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false);
- addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false);
-
- addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false);
-
- setOperationAction(ISD::ADD, MVT::v8i8, Legal);
- setOperationAction(ISD::ADD, MVT::v4i16, Legal);
- setOperationAction(ISD::ADD, MVT::v2i32, Legal);
- setOperationAction(ISD::ADD, MVT::v1i64, Legal);
-
- setOperationAction(ISD::SUB, MVT::v8i8, Legal);
- setOperationAction(ISD::SUB, MVT::v4i16, Legal);
- setOperationAction(ISD::SUB, MVT::v2i32, Legal);
- setOperationAction(ISD::SUB, MVT::v1i64, Legal);
-
- setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
- setOperationAction(ISD::MUL, MVT::v4i16, Legal);
-
- setOperationAction(ISD::AND, MVT::v8i8, Promote);
- AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64);
- setOperationAction(ISD::AND, MVT::v4i16, Promote);
- AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64);
- setOperationAction(ISD::AND, MVT::v2i32, Promote);
- AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::AND, MVT::v1i64, Legal);
-
- setOperationAction(ISD::OR, MVT::v8i8, Promote);
- AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64);
- setOperationAction(ISD::OR, MVT::v4i16, Promote);
- AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64);
- setOperationAction(ISD::OR, MVT::v2i32, Promote);
- AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::OR, MVT::v1i64, Legal);
-
- setOperationAction(ISD::XOR, MVT::v8i8, Promote);
- AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64);
- setOperationAction(ISD::XOR, MVT::v4i16, Promote);
- AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64);
- setOperationAction(ISD::XOR, MVT::v2i32, Promote);
- AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::XOR, MVT::v1i64, Legal);
-
- setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
- AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
- setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
- AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
- setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
- AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
-
- setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
-
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
-
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
-
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
-
- setOperationAction(ISD::SELECT, MVT::v8i8, Promote);
- setOperationAction(ISD::SELECT, MVT::v4i16, Promote);
- setOperationAction(ISD::SELECT, MVT::v2i32, Promote);
- setOperationAction(ISD::SELECT, MVT::v1i64, Custom);
- setOperationAction(ISD::VSETCC, MVT::v8i8, Custom);
- setOperationAction(ISD::VSETCC, MVT::v4i16, Custom);
- setOperationAction(ISD::VSETCC, MVT::v2i32, Custom);
-
- if (!X86ScalarSSEf64 && Subtarget->is64Bit()) {
- setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom);
- }
- }
+ addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
+ // No operations on x86mmx supported, everything uses intrinsics.
+ }
+
+ // MMX-sized vectors (other than x86mmx) are expected to be expanded
+ // into smaller operations.
+ setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
+ setOperationAction(ISD::AND, MVT::v8i8, Expand);
+ setOperationAction(ISD::AND, MVT::v4i16, Expand);
+ setOperationAction(ISD::AND, MVT::v2i32, Expand);
+ setOperationAction(ISD::AND, MVT::v1i64, Expand);
+ setOperationAction(ISD::OR, MVT::v8i8, Expand);
+ setOperationAction(ISD::OR, MVT::v4i16, Expand);
+ setOperationAction(ISD::OR, MVT::v2i32, Expand);
+ setOperationAction(ISD::OR, MVT::v1i64, Expand);
+ setOperationAction(ISD::XOR, MVT::v8i8, Expand);
+ setOperationAction(ISD::XOR, MVT::v4i16, Expand);
+ setOperationAction(ISD::XOR, MVT::v2i32, Expand);
+ setOperationAction(ISD::XOR, MVT::v1i64, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Expand);
if (!UseSoftFloat && Subtarget->hasSSE1()) {
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
// Do not attempt to promote non-128-bit vectors
if (!VT.is128BitVector())
continue;
-
+
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
setOperationAction(ISD::OR, SVT, Promote);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
- if (!DisableMMX && Subtarget->hasMMX()) {
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
- }
}
if (Subtarget->hasSSE41()) {
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT())
return MachineJumpTableInfo::EK_Custom32;
-
+
// Otherwise, use the normal jump table encoding heuristics.
return TargetLowering::getJumpTableEncoding();
}
RRC = (Subtarget->is64Bit()
? X86::GR64RegisterClass : X86::GR32RegisterClass);
break;
- case MVT::v8i8: case MVT::v4i16:
- case MVT::v2i32: case MVT::v1i64:
+ case MVT::x86mmx:
RRC = X86::VR64RegisterClass;
break;
case MVT::f32: case MVT::f64:
#include "X86GenCallingConv.inc"
-bool
+bool
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SDValue ValToCopy = OutVals[i];
EVT ValVT = ValToCopy.getValueType();
- // If this is x86-64, and we disabled SSE, we can't return FP values
- if ((ValVT == MVT::f32 || ValVT == MVT::f64) &&
- (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
+ // If this is x86-64, and we disabled SSE, we can't return FP values,
+ // or SSE or MMX vectors.
+ if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
+ VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
+ (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
}
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
if (Subtarget->is64Bit()) {
- if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
- ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
+ if (ValVT == MVT::x86mmx) {
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+ ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
ValToCopy);
-
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
if (!Subtarget->hasSSE2())
}
}
}
-
+
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
}
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
- assert(Reg &&
+ assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
return Ins[0].Flags.isSRet();
}
-/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
-/// given CallingConvention value.
-CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
- if (Subtarget->is64Bit()) {
- if (CC == CallingConv::GHC)
- return CC_X86_64_GHC;
- else if (Subtarget->isTargetWin64())
- return CC_X86_Win64_C;
- else
- return CC_X86_64_C;
- }
-
- if (CC == CallingConv::X86_FastCall)
- return CC_X86_32_FastCall;
- else if (CC == CallingConv::X86_ThisCall)
- return CC_X86_32_ThisCall;
- else if (CC == CallingConv::Fast)
- return CC_X86_32_FastCC;
- else if (CC == CallingConv::GHC)
- return CC_X86_32_GHC;
- else
- return CC_X86_32_C;
-}
-
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
DebugLoc dl) {
- SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile*/false, /*AlwaysInline=*/true,
- NULL, 0, NULL, 0);
+ MachinePointerInfo(), MachinePointerInfo());
}
/// IsTailCallConvention - Return true if the calling convention is one that
VA.getLocMemOffset(), isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0);
}
}
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
+ CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
unsigned LastVal = ~0U;
SDValue ArgValue;
RC = X86::VR256RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
RC = X86::VR128RegisterClass;
- else if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
+ else if (RegVT == MVT::x86mmx)
RC = X86::VR64RegisterClass;
else
llvm_unreachable("Unknown argument type!");
if (VA.isExtInLoc()) {
// Handle MMX values passed in XMM regs.
if (RegVT.isVector()) {
- ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
- ArgValue, DAG.getConstant(0, MVT::i64));
- ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+ ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
+ ArgValue);
} else
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
}
// If value is passed via pointer - do a load.
if (VA.getLocInfo() == CCValAssign::Indirect)
- ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0,
- false, false, 0);
+ ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
+ MachinePointerInfo(), false, false, 0);
InVals.push_back(ArgValue);
}
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
- CallConv != CallingConv::X86_ThisCall)) {
+ if (!IsWin64 && (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall))) {
FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
}
if (Is64Bit) {
static const unsigned GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
- static const unsigned XMMArgRegsWin64[] = {
- X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
- };
static const unsigned GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
- const unsigned *GPR64ArgRegs, *XMMArgRegs;
+ const unsigned *GPR64ArgRegs;
+ unsigned NumXMMRegs = 0;
if (IsWin64) {
- TotalNumIntRegs = 4; TotalNumXMMRegs = 4;
+ // The XMM registers which might contain var arg parameters are shadowed
+ // in their paired GPR. So we only need to save the GPR to their home
+ // slots.
+ TotalNumIntRegs = 4;
GPR64ArgRegs = GPR64ArgRegsWin64;
- XMMArgRegs = XMMArgRegsWin64;
} else {
TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
GPR64ArgRegs = GPR64ArgRegs64Bit;
- XMMArgRegs = XMMArgRegs64Bit;
+
+ NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs);
}
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
- unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
- TotalNumXMMRegs);
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
// on the stack.
TotalNumXMMRegs = 0;
- // For X86-64, if there are vararg parameters that are passed via
- // registers, then we must store them to their spots on the stack so they
- // may be loaded by deferencing the result of va_next.
- FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
- FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
- FuncInfo->setRegSaveFrameIndex(
- MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
+ if (IsWin64) {
+ const TargetFrameInfo &TFI = *getTargetMachine().getFrameInfo();
+ // Get to the caller-allocated home save location. Add 8 to account
+ // for the return address.
+ int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+ FuncInfo->setRegSaveFrameIndex(
+ MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+ FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+ } else {
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so they
+ // may be loaded by deferencing the result of va_next.
+ FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+ FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
+ FuncInfo->setRegSaveFrameIndex(
+ MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
false));
+ }
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(
- FuncInfo->getRegSaveFrameIndex()),
- Offset, false, false, 0);
+ MachinePointerInfo::getFixedStack(
+ FuncInfo->getRegSaveFrameIndex(), Offset),
+ false, false, 0);
MemOps.push_back(Store);
Offset += 8;
}
FuncInfo->getVarArgsFPOffset()));
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
- unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
+ unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
X86::VR128RegisterClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
SaveXMMOps.push_back(Val);
unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
- if (Flags.isByVal()) {
+ if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
- }
+
return DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), LocMemOffset,
+ MachinePointerInfo::getStack(LocMemOffset),
false, false, 0);
}
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
- OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0);
+ OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
+ false, false, 0);
return SDValue(OutRetAddr.getNode(), 1);
}
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
- PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0,
+ MachinePointerInfo::getFixedStack(NewReturnAddrFI),
false, false, 0);
return Chain;
}
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0);
Arg = SpillSlot;
break;
// Store relative to framepointer.
MemOpChains2.push_back(
DAG.getStore(ArgChain, dl, Arg, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- (GV->isDeclaration() || GV->isWeakForLinker()) &&
- Subtarget->getDarwinVers() < 9) {
+ (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ Subtarget->getDarwinVers() < 9) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
getTargetMachine().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- Subtarget->getDarwinVers() < 9) {
+ Subtarget->getDarwinVers() < 9) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
if (CCInfo.getNextStackOffset()) {
MachineFunction &MF = DAG.getMachineFunction();
if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
}
}
+ // An stdcall caller is expected to clean up its arguments; the callee
+ // isn't going to do that. PR 8461.
+ if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+ return false;
+
return true;
}
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFPD:
+ case X86ISD::PALIGN:
case X86ISD::SHUFPS:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
case X86ISD::MOVLPD:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
+ case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
+ case X86ISD::MOVDDUP:
return DAG.getNode(Opc, dl, VT, V1);
}
SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::PALIGN:
case X86ISD::SHUFPD:
case X86ISD::SHUFPS:
return DAG.getNode(Opc, dl, VT, V1, V2,
/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
/// the second operand.
static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
- if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16)
+ if (VT == MVT::v4f32 || VT == MVT::v4i32 )
return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
if (VT == MVT::v2f64 || VT == MVT::v2i64)
return (Mask[0] < 2 && Mask[1] < 2);
static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool hasSSSE3) {
int i, e = VT.getVectorNumElements();
-
+
// Do not handle v2i64 / v2f64 shuffles with palignr.
if (e < 4 || !hasSSSE3)
return false;
-
+
for (i = 0; i != e; ++i)
if (Mask[i] >= 0)
break;
-
+
// All undef, not a palignr.
if (i == e)
return false;
bool NeedsUnary = false;
int s = Mask[i] - i;
-
+
// Check the rest of the elements to see if they are consecutive.
for (++i; i != e; ++i) {
int m = Mask[i];
- if (m < 0)
+ if (m < 0)
continue;
-
+
Unary = Unary && (m < (int)e);
NeedsUnary = NeedsUnary || (m < s);
/// <2, 3, 2, 3>
bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
unsigned NumElems = N->getValueType(0).getVectorNumElements();
-
+
if (NumElems != 4)
return false;
-
+
return isUndefOrEqual(N->getMaskElt(0), 2) &&
isUndefOrEqual(N->getMaskElt(1), 3) &&
isUndefOrEqual(N->getMaskElt(2), 2) &&
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted
+ // Always build SSE zero vectors as <4 x i32> bitcasted
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
- if (VT.getSizeInBits() == 64) { // MMX
- SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
- } else if (VT.getSizeInBits() == 128) {
+ if (VT.getSizeInBits() == 128) { // SSE
if (HasSSE2) { // SSE2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
// type. This ensures they get CSE'd.
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
- if (VT.getSizeInBits() == 64) // MMX
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
- else // SSE
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
}
/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
- if (SV->getValueType(0).getVectorNumElements() <= 4)
- return SDValue(SV, 0);
-
EVT PVT = MVT::v4f32;
EVT VT = SV->getValueType(0);
DebugLoc dl = SV->getDebugLoc();
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, DebugLoc dl) {
- bool isMMX = VT.getSizeInBits() == 64;
- EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
+ EVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
SDValue
X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
SelectionDAG &DAG) const {
-
+
// Check if the scalar load can be widened into a vector load. And if
// the address is "base + cst" see if the cst can be "absorbed" into
// the shuffle mask.
int EltNo = (Offset - StartOffset) >> 2;
int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
- SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0,
+ SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(StartOffset),
false, false, 0);
// Canonicalize it to a v4i32 shuffle.
V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
DAG.getVectorShuffle(MVT::v4i32, dl, V1,
- DAG.getUNDEF(MVT::v4i32), &Mask[0]));
+ DAG.getUNDEF(MVT::v4i32),&Mask[0]));
}
return SDValue();
}
-/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
-/// vector of type 'VT', see if the elements can be replaced by a single large
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
/// load which has the same value as a build_vector whose operands are 'elts'.
///
/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
-///
+///
/// FIXME: we'd also like to handle the case where the last elements are zero
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
/// There's even a handy isZeroNode for that purpose.
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
- DebugLoc &dl, SelectionDAG &DAG) {
+ DebugLoc &DL, SelectionDAG &DAG) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = Elts.size();
-
+
LoadSDNode *LDBase = NULL;
unsigned LastLoadedElt = -1U;
-
+
// For each element in the initializer, see if we've found a load or an undef.
- // If we don't find an initial load element, or later load elements are
+ // If we don't find an initial load element, or later load elements are
// non-consecutive, bail out.
for (unsigned i = 0; i < NumElems; ++i) {
SDValue Elt = Elts[i];
-
+
if (!Elt.getNode() ||
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
return SDValue();
// consecutive loads for the low half, generate a vzext_load node.
if (LastLoadedElt == NumElems - 1) {
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
- return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
- LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
- return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
- LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(),
LDBase->getAlignment());
} else if (NumElems == 4 && LastLoadedElt == 1) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
- SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
+ SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
+ Ops, 2, MVT::i32,
+ LDBase->getMemOperand());
+ return DAG.getNode(ISD::BIT_CONVERT, DL, VT, ResNode);
}
return SDValue();
}
if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
(Op.getValueType().getSizeInBits() != 256 &&
ISD::isBuildVectorAllOnes(Op.getNode()))) {
- // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
+ // Canonicalize this to <4 x i32> (SSE) to
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
- if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
+ if (Op.getValueType() == MVT::v4i32)
return Op;
if (ISD::isBuildVectorAllOnes(Op.getNode()))
if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
(!IsAllConstants || Idx == 0)) {
if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
- // Handle MMX and SSE both.
- EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
- unsigned VecElts = VT == MVT::v2i64 ? 4 : 2;
+ // Handle SSE only.
+ assert(VT == MVT::v2i64 && "Expected an SSE value type!");
+ EVT VecVT = MVT::v4i32;
+ unsigned VecElts = 4;
// Truncate the value (which may itself be a constant) to i32, and
// convert it to a vector with movd (S2V+shuffle to zero extend).
DAG);
} else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
- EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
+ assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ EVT MiddleVT = MVT::v4i32;
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
Subtarget->hasSSE2(), DAG);
// Check for a build vector of consecutive loads.
for (unsigned i = 0; i < NumElems; ++i)
V[i] = Op.getOperand(i);
-
+
// Check for elements which are consecutive loads.
SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
if (LD.getNode())
return LD;
-
- // For SSE 4.1, use insertps to put the high elements into the low element.
+
+ // For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) {
SDValue Result;
if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
else
Result = DAG.getUNDEF(VT);
-
+
for (unsigned i = 1; i < NumElems; ++i) {
if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
}
return Result;
}
-
+
// Otherwise, expand into a number of unpckl*, start by extending each of
// our (non-undef) elements to the full vector width with the element in the
// bottom slot of the vector (which generates no code for SSE).
if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
EltStride == NumElems/2)
continue;
-
+
V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
}
EltStride >>= 1;
}
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
-/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be
+/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
/// the right sequence. e.g.
-/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
+/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
static
SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG,
- const TargetLowering &TLI, DebugLoc dl) {
+ SelectionDAG &DAG, DebugLoc dl) {
EVT VT = SVOp->getValueType(0);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
unsigned NumElems = VT.getVectorNumElements();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
- EVT MaskVT = (NewWidth == 4) ? MVT::v4i16 : MVT::v2i32;
- EVT NewVT = MaskVT;
+ EVT NewVT;
switch (VT.getSimpleVT().SimpleTy) {
default: assert(false && "Unexpected!");
case MVT::v4f32: NewVT = MVT::v2f64; break;
case MVT::v16i8: NewVT = MVT::v4i32; break;
}
- if (NewWidth == 2) {
- if (VT.isInteger())
- NewVT = MVT::v2i64;
- else
- NewVT = MVT::v2f64;
- }
int Scale = NumElems / NewWidth;
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i += Scale) {
// movssrr and movsdrr do not clear top bits. Try to use movd, movq
// instead.
MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
- if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) &&
+ if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
return false;
}
+// FIXME: the version above should always be used. Since there's
+// a bug where several vector shuffles can't be folded because the
+// DAG is not updated during lowering and a node claims to have two
+// uses while it only has one, use this version, and let isel match
+// another instruction if the load really happens to have more than
+// one use. Remove this version after this bug get fixed.
+// rdar://8434668, PR8156
+static bool RelaxedMayFoldVectorLoad(SDValue V) {
+ if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
+ V = V.getOperand(0);
+ if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ V = V.getOperand(0);
+ if (ISD::isNormalLoad(V.getNode()))
+ return true;
+ return false;
+}
+
+/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
+/// a vector extract, and if both can be later optimized into a single load.
+/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
+/// here because otherwise a target specific shuffle node is going to be
+/// emitted for this shuffle, and the optimization not done.
+/// FIXME: This is probably not the best approach, but fix the problem
+/// until the right path is decided.
+static
+bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = V.getValueType();
+ ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
+
+ // Be sure that the vector shuffle is present in a pattern like this:
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
+ if (!V.hasOneUse())
+ return false;
+
+ SDNode *N = *V.getNode()->use_begin();
+ if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
+ SDValue EltNo = N->getOperand(1);
+ if (!isa<ConstantSDNode>(EltNo))
+ return false;
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ bool HasShuffleIntoBitcast = false;
+ if (V.getOpcode() == ISD::BIT_CONVERT) {
+ EVT SrcVT = V.getOperand(0).getValueType();
+ if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
+ return false;
+ V = V.getOperand(0);
+ HasShuffleIntoBitcast = true;
+ }
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
+ V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
+
+ // Skip one more bit_convert if necessary
+ if (V.getOpcode() == ISD::BIT_CONVERT)
+ V = V.getOperand(0);
+
+ if (ISD::isNormalLoad(V.getNode())) {
+ // Is the original load suitable?
+ LoadSDNode *LN0 = cast<LoadSDNode>(V);
+
+ // FIXME: avoid the multi-use bug that is preventing lots of
+ // of foldings to be detected, this is still wrong of course, but
+ // give the temporary desired behavior, and if it happens that
+ // the load has real more uses, during isel it will not fold, and
+ // will generate poor code.
+ if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse()
+ return false;
+
+ if (!HasShuffleIntoBitcast)
+ return true;
+
+ // If there's a bitcast before the shuffle, check if the load type and
+ // alignment is valid.
+ unsigned Align = LN0->getAlignment();
+ unsigned NewAlign =
+ TLI.getTargetData()->getABITypeAlignment(
+ VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+ return false;
+ }
+
+ return true;
+}
+
+static
+SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ // Canonizalize to v2f64.
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, V1);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
+ V1, DAG));
+}
+
static
SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
bool HasSSE2) {
return 0;
}
-SDValue
-X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+static
+SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ const X86Subtarget *Subtarget) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
- unsigned NumElems = VT.getVectorNumElements();
- bool isMMX = VT.getSizeInBits() == 64;
- bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
- bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
- bool V1IsSplat = false;
- bool V2IsSplat = false;
- bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
- bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
- MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
if (isZeroShuffle(SVOp))
return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
- // Promote splats to v4f32.
+ // Handle splat operations
if (SVOp->isSplat()) {
- if (isMMX || NumElems < 4)
+ // Special case, this is the only place now where it's
+ // allowed to return a vector_shuffle operation without
+ // using a target specific node, because *hopefully* it
+ // will be optimized away by the dag combiner.
+ if (VT.getVectorNumElements() <= 4 &&
+ CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
return Op;
+
+ // Handle splats by matching through known masks
+ if (VT.getVectorNumElements() <= 4)
+ return SDValue();
+
+ // Canonicalize all of the remaining to v4f32.
return PromoteSplat(SVOp, DAG);
}
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode())
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
- LowerVECTOR_SHUFFLE(NewOp, DAG));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, NewOp);
} else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode()) {
if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
DAG, Subtarget, dl);
}
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
DAG, Subtarget, dl);
}
}
+ return SDValue();
+}
- if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) {
- // NOTE: isPSHUFDMask can also match this mask, if speed is more
- // important than size here, this will be matched by pshufd
- if (VT == MVT::v4f32)
- return getTargetShuffleNode(X86ISD::UNPCKLPS, dl, VT, V1, V1, DAG);
- if (HasSSE2 && VT == MVT::v16i8)
- return getTargetShuffleNode(X86ISD::PUNPCKLBW, dl, VT, V1, V1, DAG);
- if (HasSSE2 && VT == MVT::v8i16)
- return getTargetShuffleNode(X86ISD::PUNPCKLWD, dl, VT, V1, V1, DAG);
- if (HasSSE2 && VT == MVT::v4i32)
- return getTargetShuffleNode(X86ISD::PUNPCKLDQ, dl, VT, V1, V1, DAG);
- }
-
- if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) {
- // NOTE: isPSHUFDMask can also match this mask, if speed is more
- // important than size here, this will be matched by pshufd
- if (VT == MVT::v4f32)
- return getTargetShuffleNode(X86ISD::UNPCKHPS, dl, VT, V1, V1, DAG);
- if (HasSSE2 && VT == MVT::v16i8)
- return getTargetShuffleNode(X86ISD::PUNPCKHBW, dl, VT, V1, V1, DAG);
- if (HasSSE2 && VT == MVT::v8i16)
- return getTargetShuffleNode(X86ISD::PUNPCKHWD, dl, VT, V1, V1, DAG);
- if (HasSSE2 && VT == MVT::v4i32)
- return getTargetShuffleNode(X86ISD::PUNPCKHDQ, dl, VT, V1, V1, DAG);
- }
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned NumElems = VT.getVectorNumElements();
+ bool isMMX = VT.getSizeInBits() == 64;
+ bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ bool V1IsSplat = false;
+ bool V2IsSplat = false;
+ bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+ bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
+ bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX();
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
+ // Shuffle operations on MMX not supported.
+ if (isMMX)
+ return Op;
+
+ // Vector shuffle lowering takes 3 steps:
+ //
+ // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
+ // narrowing and commutation of operands should be handled.
+ // 2) Matching of shuffles with known shuffle masks to x86 target specific
+ // shuffle nodes.
+ // 3) Rewriting of unmatched masks into new generic shuffle operations,
+ // so the shuffle can be broken into other shuffles and the legalizer can
+ // try the lowering again.
+ //
+ // The general ideia is that no vector_shuffle operation should be left to
+ // be matched during isel, all of them must be converted to a target specific
+ // node here.
+
+ // Normalize the input vectors. Here splats, zeroed vectors, profitable
+ // narrowing and commutation of operands should be handled. The actual code
+ // doesn't include all of those, work in progress...
+ SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+ if (NewOp.getNode())
+ return NewOp;
+
+ // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
+ // unpckh_undef). Only use pshufd if speed is more important than size.
+ if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+ if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef &&
+ RelaxedMayFoldVectorLoad(V1))
+ return getMOVDDup(Op, dl, V1, DAG);
+
+ if (X86::isMOVHLPS_v_undef_Mask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
+
+ // Use to match splats
+ if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+ (VT == MVT::v2f64 || VT == MVT::v2i64))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
if (X86::isPSHUFDMask(SVOp)) {
// The actual implementation will match the mask in the if above and then
return V2;
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
- if (!isMMX && !X86::isMOVLPMask(SVOp)) {
+ if (!X86::isMOVLPMask(SVOp)) {
if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
}
// FIXME: fold these into legal mask.
- if (!isMMX) {
- if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
- return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+ if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+ return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
- if (X86::isMOVHLPSMask(SVOp))
- return getMOVHighToLow(Op, dl, DAG);
+ if (X86::isMOVHLPSMask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
- if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
- return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+ if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
- if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
- return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+ if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
- if (X86::isMOVLPMask(SVOp))
- return getMOVLP(Op, dl, DAG, HasSSE2);
- }
+ if (X86::isMOVLPMask(SVOp))
+ return getMOVLP(Op, dl, DAG, HasSSE2);
if (ShouldXformToMOVHLPS(SVOp) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
}
if (X86::isUNPCKLMask(SVOp))
- return (isMMX) ?
- Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
if (X86::isUNPCKHMask(SVOp))
- return (isMMX) ?
- Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
if (X86::isUNPCKLMask(NewSVOp))
- return (isMMX) ?
- NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
if (X86::isUNPCKHMask(NewSVOp))
- return (isMMX) ?
- NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
}
- // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle.
-
// Normalize the node to match x86 shuffle ops if needed
- if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+ if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
SmallVector<int, 16> M;
SVOp->getMask(M);
- // Very little shuffling can be done for 64-bit vectors right now.
- if (VT.getSizeInBits() == 64)
- return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ? Op : SDValue();
+ if (isPALIGNRMask(M, VT, HasSSSE3))
+ return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+ X86::getShufflePALIGNRImmediate(SVOp),
+ DAG);
- // FIXME: pshufb, blends, shifts.
- if (VT.getVectorNumElements() == 2 ||
- ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
- isMOVLMask(M, VT) ||
- isSHUFPMask(M, VT) ||
- isPSHUFDMask(M, VT) ||
- isPSHUFHWMask(M, VT) ||
- isPSHUFLWMask(M, VT) ||
- isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ||
- isUNPCKLMask(M, VT) ||
- isUNPCKHMask(M, VT) ||
- isUNPCKL_v_undef_Mask(M, VT) ||
- isUNPCKH_v_undef_Mask(M, VT))
- return Op;
+ if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
+ SVOp->getSplatIndex() == 0 && V2IsUndef) {
+ if (VT == MVT::v2f64)
+ return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
+ if (VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+ }
+
+ if (isPSHUFHWMask(M, VT))
+ return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
+ X86::getShufflePSHUFHWImmediate(SVOp),
+ DAG);
+
+ if (isPSHUFLWMask(M, VT))
+ return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
+ X86::getShufflePSHUFLWImmediate(SVOp),
+ DAG);
+
+ if (isSHUFPMask(M, VT)) {
+ unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+ if (VT == MVT::v4f32 || VT == MVT::v4i32)
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2,
+ TargetMask, DAG);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2,
+ TargetMask, DAG);
+ }
+
+ if (X86::isUNPCKL_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ if (X86::isUNPCKH_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
return NewOp;
}
- // Handle all 4 wide cases with a number of shuffles except for MMX.
- if (NumElems == 4 && !isMMX)
+ // Handle all 4 wide cases with a number of shuffles.
+ if (NumElems == 4)
return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
return SDValue();
unsigned Opc;
if (VT == MVT::v8i16)
Opc = X86ISD::PINSRW;
- else if (VT == MVT::v4i16)
- Opc = X86ISD::MMX_PINSRW;
else if (VT == MVT::v16i8)
Opc = X86ISD::PINSRB;
else
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
- return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW,
- dl, VT, N0, N1, N2);
+ return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
}
return SDValue();
}
SDValue
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
-
+
if (Op.getValueType() == MVT::v1i64 &&
Op.getOperand(0).getValueType() == MVT::i64)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
- EVT VT = MVT::v2i32;
- switch (Op.getValueType().getSimpleVT().SimpleTy) {
- default: break;
- case MVT::v16i8:
- case MVT::v8i16:
- VT = MVT::v4i32;
- break;
- }
+ assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 &&
+ "Expected an SSE type!");
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(),
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt));
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// load.
if (isGlobalStubReference(OpFlags))
Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
- PseudoSourceValue::getGOT(), 0, false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
const EVT PtrVT, TLSModel::Model model,
bool is64Bit) {
DebugLoc dl = GA->getDebugLoc();
- // Get the Thread Pointer
- SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress,
- DebugLoc(), PtrVT,
- DAG.getRegister(is64Bit? X86::FS : X86::GS,
- MVT::i32));
- SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base,
- NULL, 0, false, false, 0);
+ // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
+ Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
+ is64Bit ? 257 : 256));
+
+ SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getIntPtrConstant(0),
+ MachinePointerInfo(Ptr), false, false, 0);
unsigned char OperandFlags = 0;
// Most TLS accesses are not RIP relative, even on x86-64. One exception is
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec)
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
- PseudoSourceValue::getGOT(), 0, false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, 0);
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
-
+
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
// TODO: implement the "local dynamic" model
// TODO: implement the "initial exec"model for pic executables
-
+
// If GV is an alias then use the aliasee for determining
// thread-localness.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GV = GA->resolveAliasedGlobal(false);
-
- TLSModel::Model model
+
+ TLSModel::Model model
= getTLSModel(GV, getTargetMachine().getRelocationModel());
-
+
switch (model) {
case TLSModel::GeneralDynamic:
case TLSModel::LocalDynamic: // not implemented
if (Subtarget->is64Bit())
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
-
+
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
unsigned char OpFlag = 0;
unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
X86ISD::WrapperRIP : X86ISD::Wrapper;
-
+
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
OpFlag = X86II::MO_TLVP_PIC_BASE;
else
OpFlag = X86II::MO_TLVP;
- DebugLoc DL = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
getPointerTy(),
GA->getOffset(), OpFlag);
SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
-
+
// With PIC32, the address is actually $g + Offset.
if (PIC32)
Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc(), getPointerTy()),
Offset);
-
+
// Lowering the machine isd will make sure everything is in the right
// location.
SDValue Args[] = { Offset };
SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1);
-
+
// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setAdjustsStack(true);
unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
}
-
+
assert(false &&
"TLS not implemented for this target.");
SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
- if (SrcVT.isVector()) {
- if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) {
- return Op;
- }
+ if (SrcVT.isVector())
return SDValue();
- }
assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot,
- PseudoSourceValue::getFixedStack(SSFI), 0,
+ MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
}
SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
- SDValue StackSlot,
+ SDValue StackSlot,
SelectionDAG &DAG) const {
// Build the FILD
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
SDVTList Tys;
bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
if (useSSE)
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+
+ unsigned ByteSize = SrcVT.getSizeInBits()/8;
+
+ int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, ByteSize, ByteSize);
+
SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
- SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, dl,
- Tys, Ops, array_lengthof(Ops));
+ SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
+ X86ISD::FILD, DL,
+ Tys, Ops, array_lengthof(Ops),
+ SrcVT, MMO);
if (useSSE) {
Chain = Result.getValue(1);
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+ unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys = DAG.getVTList(MVT::Other);
SDValue Ops[] = {
Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
};
- Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops));
- Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot,
- PseudoSourceValue::getFixedStack(SSFI), 0,
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, SSFISize, SSFISize);
+
+ Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
+ Ops, array_lengthof(Ops),
+ Op.getValueType(), MMO);
+ Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
}
DAG.getIntPtrConstant(0)));
SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2);
SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
getPointerTy(), StackSlot, WordOff);
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
- StackSlot, NULL, 0, false, false, 0);
+ StackSlot, MachinePointerInfo(),
+ false, false, 0);
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
- OffsetSlot, NULL, 0, false, false, 0);
+ OffsetSlot, MachinePointerInfo(),
+ false, false, 0);
SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
return Fild;
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
- StackSlot, NULL, 0, false, false, 0);
+ StackSlot, MachinePointerInfo(),
+ false, false, 0);
// For i64 source, we need to add the appropriate power of 2 if the input
// was negative. This is the same as the optimization in
// DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
// we must be careful to do the computation in x87 extended precision, not
// in SSE. (The generic code can't know it's OK to do this, or how to.)
+ int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, 8, 8);
+
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
- SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3);
+ SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
+ MVT::i64, MMO);
APInt FF(32, 0x5F800000ULL);
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(),
- FudgePtr, PseudoSourceValue::getConstantPool(),
- 0, MVT::f32, false, false, 4);
+ FudgePtr, MachinePointerInfo::getConstantPool(),
+ MVT::f32, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
std::pair<SDValue,SDValue> X86TargetLowering::
FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
EVT DstTy = Op.getValueType();
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+
unsigned Opc;
switch (DstTy.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
- if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
+ EVT TheVT = Op.getOperand(0).getValueType();
+ if (isScalarFPTypeInSSEReg(TheVT)) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
- Chain = DAG.getStore(Chain, dl, Value, StackSlot,
- PseudoSourceValue::getFixedStack(SSFI), 0,
+ Chain = DAG.getStore(Chain, DL, Value, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDValue Ops[] = {
- Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
+ Chain, StackSlot, DAG.getValueType(TheVT)
};
- Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, MemSize, MemSize);
+ Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
+ DstTy, MMO);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
}
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, MemSize, MemSize);
+
// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };
- SDValue FIST = DAG.getNode(Opc, dl, MVT::Other, Ops, 3);
+ SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+ Ops, 3, DstTy, MMO);
return std::make_pair(FIST, StackSlot);
}
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getValueType().isVector()) {
- if (Op.getValueType() == MVT::v2i32 &&
- Op.getOperand(0).getValueType() == MVT::v2f64) {
- return Op;
- }
+ if (Op.getValueType().isVector())
return SDValue();
- }
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
// Load the result.
return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
- FIST, StackSlot, NULL, 0, false, false, 0);
+ FIST, StackSlot, MachinePointerInfo(), false, false, 0);
}
SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
// Load the result.
return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
- FIST, StackSlot, NULL, 0, false, false, 0);
+ FIST, StackSlot, MachinePointerInfo(), false, false, 0);
}
SDValue X86TargetLowering::LowerFABS(SDValue Op,
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
}
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
if (VT.isVector()) {
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
switch (VT.getSimpleVT().SimpleTy) {
default: break;
- case MVT::v8i8:
case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
- case MVT::v4i16:
case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
- case MVT::v2i32:
case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
}
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (C && C->getAPIntValue() == 1)
+ if (C && C->getAPIntValue() == 1)
Cond = Cond.getOperand(0);
}
// We know the result of AND is compared against zero. Try to match
// it to BT.
- if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (C && C->getAPIntValue() == 1)
+ if (C && C->getAPIntValue() == 1)
Cond = Cond.getOperand(0);
}
// We know the result of AND is compared against zero. Try to match
// it to BT.
- if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
- assert(Subtarget->isTargetCygMing() &&
- "This should be used only on Cygwin/Mingw targets");
+ assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
+ "This should be used only on Windows targets");
DebugLoc dl = Op.getDebugLoc();
// Get the inputs.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag);
+ Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
Flag = Chain.getValue(1);
Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
- if (!Subtarget->is64Bit()) {
+ if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
- return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
- false, false, 0);
+ return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
}
// __va_list_tag:
SmallVector<SDValue, 8> MemOps;
SDValue FIN = Op.getOperand(1);
// Store gp_offset
- SDValue Store = DAG.getStore(Op.getOperand(0), dl,
+ SDValue Store = DAG.getStore(Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
MVT::i32),
- FIN, SV, 0, false, false, 0);
+ FIN, MachinePointerInfo(SV), false, false, 0);
MemOps.push_back(Store);
// Store fp_offset
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(4));
- Store = DAG.getStore(Op.getOperand(0), dl,
+ Store = DAG.getStore(Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
MVT::i32),
- FIN, SV, 4, false, false, 0);
+ FIN, MachinePointerInfo(SV, 4), false, false, 0);
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(4));
SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8,
+ Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
+ MachinePointerInfo(SV, 8),
false, false, 0);
MemOps.push_back(Store);
// Store ptr to reg_save_area.
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(8));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16,
- false, false, 0);
+ Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
+ MachinePointerInfo(SV, 16), false, false, 0);
MemOps.push_back(Store);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
&MemOps[0], MemOps.size());
}
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
- // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
- assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
+ assert(Subtarget->is64Bit() &&
+ "LowerVAARG only handles 64-bit va_arg!");
+ assert((Subtarget->isTargetLinux() ||
+ Subtarget->isTargetDarwin()) &&
+ "Unhandled target in LowerVAARG");
+ assert(Op.getNode()->getNumOperands() == 4);
+ SDValue Chain = Op.getOperand(0);
+ SDValue SrcPtr = Op.getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ unsigned Align = Op.getConstantOperandVal(3);
+ DebugLoc dl = Op.getDebugLoc();
- report_fatal_error("VAArgInst is not yet implemented for x86-64!");
- return SDValue();
+ EVT ArgVT = Op.getNode()->getValueType(0);
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+ uint8_t ArgMode;
+
+ // Decide which area this value should be read from.
+ // TODO: Implement the AMD64 ABI in its entirety. This simple
+ // selection mechanism works only for the basic types.
+ if (ArgVT == MVT::f80) {
+ llvm_unreachable("va_arg for f80 not yet implemented");
+ } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
+ ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
+ } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
+ ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
+ } else {
+ llvm_unreachable("Unhandled argument type in LowerVAARG");
+ }
+
+ if (ArgMode == 2) {
+ // Sanity Check: Make sure using fp_offset makes sense.
+ assert(!UseSoftFloat &&
+ !(DAG.getMachineFunction()
+ .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+ Subtarget->hasSSE1());
+ }
+
+ // Insert VAARG_64 node into the DAG
+ // VAARG_64 returns two values: Variable Argument Address, Chain
+ SmallVector<SDValue, 11> InstOps;
+ InstOps.push_back(Chain);
+ InstOps.push_back(SrcPtr);
+ InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
+ InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
+ InstOps.push_back(DAG.getConstant(Align, MVT::i32));
+ SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+ SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
+ VTs, &InstOps[0], InstOps.size(),
+ MVT::i64,
+ MachinePointerInfo(SV),
+ /*Align=*/0,
+ /*Volatile=*/false,
+ /*ReadMem=*/true,
+ /*WriteMem=*/true);
+ Chain = VAARG.getValue(1);
+
+ // Load the next argument and return it
+ return DAG.getLoad(ArgVT, dl,
+ Chain,
+ VAARG,
+ MachinePointerInfo(),
+ false, false, 0);
}
SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
SDValue SrcPtr = Op.getOperand(2);
const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
- return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr,
+ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
- false, DstSV, 0, SrcSV, 0);
+ false,
+ MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
SDValue
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
} else {
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+// FIXME this must be lowered to get rid of the invalid type.
}
EVT VT = Op.getValueType();
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
- NULL, 0, false, false, 0);
+ MachinePointerInfo(), false, false, 0);
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, NULL, 0, false, false, 0);
+ RetAddrFI, MachinePointerInfo(), false, false, 0);
}
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
- FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
false, false, 0);
return FrameAddr;
}
SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
DAG.getIntPtrConstant(TD->getPointerSize()));
StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
- Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0);
+ Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+ false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
MF.getRegInfo().addLiveOut(StoreAddrReg);
unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
SDValue Addr = Trmp;
OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
- Addr, TrmpAddr, 0, false, false, 0);
+ Addr, MachinePointerInfo(TrmpAddr),
+ false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(2, MVT::i64));
- OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2,
+ OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
+ MachinePointerInfo(TrmpAddr, 2),
false, false, 2);
// Load the 'nest' parameter value into R10.
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(10, MVT::i64));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
- Addr, TrmpAddr, 10, false, false, 0);
+ Addr, MachinePointerInfo(TrmpAddr, 10),
+ false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(12, MVT::i64));
- OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12,
+ OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 12),
false, false, 2);
// Jump to the nested function.
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(20, MVT::i64));
OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
- Addr, TrmpAddr, 20, false, false, 0);
+ Addr, MachinePointerInfo(TrmpAddr, 20),
+ false, false, 0);
unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(22, MVT::i64));
OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
- TrmpAddr, 22, false, false, 0);
+ MachinePointerInfo(TrmpAddr, 22),
+ false, false, 0);
SDValue Ops[] =
{ Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, dl,
DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
- Trmp, TrmpAddr, 0, false, false, 0);
+ Trmp, MachinePointerInfo(TrmpAddr),
+ false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(1, MVT::i32));
- OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1,
+ OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 1),
false, false, 1);
const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(5, MVT::i32));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
- TrmpAddr, 5, false, false, 1);
+ MachinePointerInfo(TrmpAddr, 5),
+ false, false, 1);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(6, MVT::i32));
- OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6,
+ OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
+ MachinePointerInfo(TrmpAddr, 6),
false, false, 1);
SDValue Ops[] =
const TargetFrameInfo &TFI = *TM.getFrameInfo();
unsigned StackAlignment = TFI.getStackAlignment();
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
// Save FP Control Word to stack slot
int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
- SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other,
- DAG.getEntryNode(), StackSlot);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, 2, 2);
+
+ SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
+ SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
+ DAG.getVTList(MVT::Other),
+ Ops, 2, MVT::i16, MMO);
// Load FP Control Word from stack slot
- SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0,
- false, false, 0);
+ SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
+ MachinePointerInfo(), false, false, 0);
// Transform as necessary
SDValue CWD1 =
- DAG.getNode(ISD::SRL, dl, MVT::i16,
- DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16,
CWD, DAG.getConstant(0x800, MVT::i16)),
DAG.getConstant(11, MVT::i8));
SDValue CWD2 =
- DAG.getNode(ISD::SRL, dl, MVT::i16,
- DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16,
CWD, DAG.getConstant(0x400, MVT::i16)),
DAG.getConstant(9, MVT::i8));
SDValue RetVal =
- DAG.getNode(ISD::AND, dl, MVT::i16,
- DAG.getNode(ISD::ADD, dl, MVT::i16,
- DAG.getNode(ISD::OR, dl, MVT::i16, CWD1, CWD2),
+ DAG.getNode(ISD::AND, DL, MVT::i16,
+ DAG.getNode(ISD::ADD, DL, MVT::i16,
+ DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
DAG.getConstant(1, MVT::i16)),
DAG.getConstant(3, MVT::i16));
return DAG.getNode((VT.getSizeInBits() < 16 ?
- ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(1), DAG.getConstant(23, MVT::i32));
ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
-
+
std::vector<Constant*> CV(4, CI);
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
Constant *C = ConstantVector::get(CVM1);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
// r = pblendv(r, psllw(r & (char16)15, 4), a);
R, M, Op);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-
+
C = ConstantVector::get(CVM2);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0, false, false, 16);
-
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+
// r = pblendv(r, psllw(r & (char16)63, 2), a);
M = DAG.getNode(ISD::AND, dl, VT, R, M);
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
R, M, Op);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-
+
// return pblendv(r, r+r, a);
R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
DebugLoc dl = Op.getDebugLoc();
-
+
if (!Subtarget->hasSSE2()) {
SDValue Chain = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0,
+ SDValue Zero = DAG.getConstant(0,
Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
SDValue Ops[] = {
DAG.getRegister(X86::ESP, MVT::i32), // Base
Zero,
Chain
};
- SDNode *Res =
+ SDNode *Res =
DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
array_lengthof(Ops));
return SDValue(Res, 0);
}
-
+
unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
if (!isDev)
return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
-
+
unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
-
+
// def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
if (!Op1 && !Op2 && !Op3 && Op4)
return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
-
+
// def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
if (Op1 && !Op2 && !Op3 && !Op4)
return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
-
- // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+
+ // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
// (MFENCE)>;
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
}
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
EVT T = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
unsigned Reg = 0;
unsigned size = 0;
switch(T.getSimpleVT().SimpleTy) {
Reg = X86::RAX; size = 8;
break;
}
- SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), dl, Reg,
+ SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
Op.getOperand(2), SDValue());
SDValue Ops[] = { cpIn.getValue(0),
Op.getOperand(1),
DAG.getTargetConstant(size, MVT::i8),
cpIn.getValue(1) };
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, dl, Tys, Ops, 5);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
+ Ops, 5, T, MMO);
SDValue cpOut =
- DAG.getCopyFromReg(Result.getValue(0), dl, Reg, T, Result.getValue(1));
+ DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
return cpOut;
}
SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
EVT DstVT = Op.getValueType();
- assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+ assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
Subtarget->hasMMX() && !DisableMMX) &&
"Unexpected custom BIT_CONVERT");
- assert((DstVT == MVT::i64 ||
+ assert((DstVT == MVT::i64 ||
(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
"Unexpected custom BIT_CONVERT");
// i64 <=> MMX conversions are Legal.
if (FIST.getNode() != 0) {
EVT VT = N->getValueType(0);
// Return a load from the stack slot.
- Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0,
- false, false, 0));
+ Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+ MachinePointerInfo(), false, false, 0));
}
return;
}
N->getOperand(1),
swapInH.getValue(1) };
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, 3);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys,
+ Ops, 3, T, MMO);
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
MVT::i32, Result.getValue(1));
SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
- case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
- case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
- case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";
+ case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
+ case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
}
}
return BB;
}
+MachineBasicBlock *
+X86TargetLowering::EmitVAARG64WithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // Emit va_arg instruction on X86-64.
+
+ // Operands to this pseudo-instruction:
+ // 0 ) Output : destination address (reg)
+ // 1-5) Input : va_list address (addr, i64mem)
+ // 6 ) ArgSize : Size (in bytes) of vararg type
+ // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset
+ // 8 ) Align : Alignment of type
+ // 9 ) EFLAGS (implicit-def)
+
+ assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
+ assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+
+ unsigned DestReg = MI->getOperand(0).getReg();
+ MachineOperand &Base = MI->getOperand(1);
+ MachineOperand &Scale = MI->getOperand(2);
+ MachineOperand &Index = MI->getOperand(3);
+ MachineOperand &Disp = MI->getOperand(4);
+ MachineOperand &Segment = MI->getOperand(5);
+ unsigned ArgSize = MI->getOperand(6).getImm();
+ unsigned ArgMode = MI->getOperand(7).getImm();
+ unsigned Align = MI->getOperand(8).getImm();
+
+ // Memory Reference
+ assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ // Machine Information
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+ const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
+ DebugLoc DL = MI->getDebugLoc();
+
+ // struct va_list {
+ // i32 gp_offset
+ // i32 fp_offset
+ // i64 overflow_area (address)
+ // i64 reg_save_area (address)
+ // }
+ // sizeof(va_list) = 24
+ // alignment(va_list) = 8
+
+ unsigned TotalNumIntRegs = 6;
+ unsigned TotalNumXMMRegs = 8;
+ bool UseGPOffset = (ArgMode == 1);
+ bool UseFPOffset = (ArgMode == 2);
+ unsigned MaxOffset = TotalNumIntRegs * 8 +
+ (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+
+ /* Align ArgSize to a multiple of 8 */
+ unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
+ bool NeedsAlign = (Align > 8);
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *overflowMBB;
+ MachineBasicBlock *offsetMBB;
+ MachineBasicBlock *endMBB;
+
+ unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB
+ unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
+ unsigned OffsetReg = 0;
+
+ if (!UseGPOffset && !UseFPOffset) {
+ // If we only pull from the overflow region, we don't create a branch.
+ // We don't need to alter control flow.
+ OffsetDestReg = 0; // unused
+ OverflowDestReg = DestReg;
+
+ offsetMBB = NULL;
+ overflowMBB = thisMBB;
+ endMBB = thisMBB;
+ } else {
+ // First emit code to check if gp_offset (or fp_offset) is below the bound.
+ // If so, pull the argument from reg_save_area. (branch to offsetMBB)
+ // If not, pull from overflow_area. (branch to overflowMBB)
+ //
+ // thisMBB
+ // | .
+ // | .
+ // offsetMBB overflowMBB
+ // | .
+ // | .
+ // endMBB
+
+ // Registers for the PHI in endMBB
+ OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
+ OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
+
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction *MF = MBB->getParent();
+ overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ // Insert the new basic blocks
+ MF->insert(MBBIter, offsetMBB);
+ MF->insert(MBBIter, overflowMBB);
+ MF->insert(MBBIter, endMBB);
+
+ // Transfer the remainder of MBB and its successor edges to endMBB.
+ endMBB->splice(endMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ thisMBB->end());
+ endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Make offsetMBB and overflowMBB successors of thisMBB
+ thisMBB->addSuccessor(offsetMBB);
+ thisMBB->addSuccessor(overflowMBB);
+
+ // endMBB is a successor of both offsetMBB and overflowMBB
+ offsetMBB->addSuccessor(endMBB);
+ overflowMBB->addSuccessor(endMBB);
+
+ // Load the offset value into a register
+ OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Check if there is enough room left to pull this argument.
+ BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+ .addReg(OffsetReg)
+ .addImm(MaxOffset + 8 - ArgSizeA8);
+
+ // Branch to "overflowMBB" if offset >= max
+ // Fall through to "offsetMBB" otherwise
+ BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
+ .addMBB(overflowMBB);
+ }
+
+ // In offsetMBB, emit code to use the reg_save_area.
+ if (offsetMBB) {
+ assert(OffsetReg != 0);
+
+ // Read the reg_save_area address.
+ unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 16)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Zero-extend the offset
+ unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+ .addImm(0)
+ .addReg(OffsetReg)
+ .addImm(X86::sub_32bit);
+
+ // Add the offset to the reg_save_area to get the final address.
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+ .addReg(OffsetReg64)
+ .addReg(RegSaveReg);
+
+ // Compute the offset for the next argument
+ unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+ .addReg(OffsetReg)
+ .addImm(UseFPOffset ? 16 : 8);
+
+ // Store it back into the va_list.
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .addReg(NextOffsetReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Jump to endMBB
+ BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
+ .addMBB(endMBB);
+ }
+
+ //
+ // Emit code to use overflow area
+ //
+
+ // Load the overflow_area address into a register.
+ unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we need to align it, do so. Otherwise, just copy the address
+ // to OverflowDestReg.
+ if (NeedsAlign) {
+ // Align the overflow address
+ assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
+ unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
+
+ // aligned_addr = (addr + (align-1)) & ~(align-1)
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+ .addReg(OverflowAddrReg)
+ .addImm(Align-1);
+
+ BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+ .addReg(TmpReg)
+ .addImm(~(uint64_t)(Align-1));
+ } else {
+ BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+ .addReg(OverflowAddrReg);
+ }
+
+ // Compute the next overflow address after this argument.
+ // (the overflow address should be kept 8-byte aligned)
+ unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+ .addReg(OverflowDestReg)
+ .addImm(ArgSizeA8);
+
+ // Store the new overflow address.
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .addReg(NextAddrReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we branched, emit the PHI to the front of endMBB.
+ if (offsetMBB) {
+ BuildMI(*endMBB, endMBB->begin(), DL,
+ TII->get(X86::PHI), DestReg)
+ .addReg(OffsetDestReg).addMBB(offsetMBB)
+ .addReg(OverflowDestReg).addMBB(overflowMBB);
+ }
+
+ // Erase the pseudo instruction
+ MI->eraseFromParent();
+
+ return endMBB;
+}
+
MachineBasicBlock *
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MachineInstr *MI,
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
MachineMemOperand *MMO =
F->getMachineMemOperand(
- PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
- MachineMemOperand::MOStore, Offset,
+ MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+ MachineMemOperand::MOStore,
/*Size=*/16, /*Align=*/16);
BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
.addFrameIndex(RegSaveFrameIndex)
}
MachineBasicBlock *
-X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
+X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// FIXME: The code should be tweaked as soon as we'll try to do codegen for
// mingw-w64.
+ const char *StackProbeSymbol =
+ Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca")
+ .addExternalSymbol(StackProbeSymbol)
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
// our load from the relocation, sticking it in either RDI (x86-64)
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
- const X86InstrInfo *TII
+ const X86InstrInfo *TII
= static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineFunction *F = BB->getParent();
- bool IsWin64 = Subtarget->isTargetWin64();
-
+
+ assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
assert(MI->getOperand(3).isGlobal() && "This should be a global");
-
+
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0).addReg(0)
- .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(*BB, MI, DL, TII->get(IsWin64 ? X86::WINCALL64m : X86::CALL64m));
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
} else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0).addReg(0)
- .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0).addReg(0)
- .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
}
-
+
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
- case X86::MINGW_ALLOCA:
- return EmitLoweredMingwAlloca(MI, BB);
+ case X86::WIN_ALLOCA:
+ return EmitLoweredWinAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_GR8:
- case X86::CMOV_V1I64:
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_V4F32:
false);
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
+
+ case X86::VAARG_64:
+ return EmitVAARG64WithCustomInserter(MI, BB);
}
}
}
}
+unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
+ if (Op.getOpcode() == X86ISD::SETCC_CARRY)
+ return Op.getValueType().getScalarType().getSizeInBits();
+
+ // Fallback case.
+ return 1;
+}
+
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
/// node is a GlobalAddress + offset.
bool X86TargetLowering::isGAPlusOffset(SDNode *N,
// Store the value to a temporary stack slot.
SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL,
- 0, false, false, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Replace each use (extract) with a load of the appropriate element.
for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(),
- OffsetVal, StackPtr);
+ StackPtr, OffsetVal);
// Load the scalar.
SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
- ScalarAddr, NULL, 0, false, false, 0);
+ ScalarAddr, MachinePointerInfo(),
+ false, false, 0);
// Replace the exact with the load.
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
// pair instead.
if (Subtarget->is64Bit() || F64IsLegal) {
EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
- SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
- Ld->getBasePtr(), Ld->getSrcValue(),
- Ld->getSrcValueOffset(), Ld->isVolatile(),
+ SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->getAlignment());
SDValue NewChain = NewLd.getValue(1);
if (TokenFactorIndex != -1) {
Ops.size());
}
return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
- St->getSrcValue(), St->getSrcValueOffset(),
+ St->getPointerInfo(),
St->isVolatile(), St->isNonTemporal(),
St->getAlignment());
}
DAG.getConstant(4, MVT::i32));
SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
- Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->getAlignment());
SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
- Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
+ Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
MinAlign(Ld->getAlignment(), 4));
DAG.getConstant(4, MVT::i32));
SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
- St->getSrcValue(), St->getSrcValueOffset(),
+ St->getPointerInfo(),
St->isVolatile(), St->isNonTemporal(),
St->getAlignment());
SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
- St->getSrcValue(),
- St->getSrcValueOffset() + 4,
+ St->getPointerInfo().getWithOffset(4),
St->isVolatile(),
St->isNonTemporal(),
MinAlign(St->getAlignment(), 4));
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
case X86ISD::SHUFPS: // Handle all target specific shuffles
case X86ISD::SHUFPD:
+ case X86ISD::PALIGN:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHDQ:
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
- std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
+ InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
std::string AsmStr = IA->getAsmString();
X86TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
- case 'A':
- return C_Register;
- case 'f':
- case 'r':
case 'R':
- case 'l':
case 'q':
case 'Q':
- case 'x':
+ case 'f':
+ case 't':
+ case 'u':
case 'y':
+ case 'x':
case 'Y':
return C_RegisterClass;
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ return C_Register;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'G':
+ case 'C':
case 'e':
case 'Z':
return C_Other;
return TargetLowering::getConstraintType(Constraint);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ X86TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ case 'R':
+ case 'q':
+ case 'Q':
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'f':
+ case 't':
+ case 'u':
+ if (type->isFloatingPointTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'y':
+ if (type->isX86_MMXTy() && !DisableMMX && Subtarget->hasMMX())
+ weight = CW_SpecificReg;
+ break;
+ case 'x':
+ case 'Y':
+ if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1())
+ weight = CW_Register;
+ break;
+ case 'I':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
+ if (C->getZExtValue() <= 31)
+ weight = CW_Constant;
+ }
+ break;
+ case 'J':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 63)
+ weight = CW_Constant;
+ }
+ break;
+ case 'K':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
+ weight = CW_Constant;
+ }
+ break;
+ case 'L':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
+ weight = CW_Constant;
+ }
+ break;
+ case 'M':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 3)
+ weight = CW_Constant;
+ }
+ break;
+ case 'N':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xff)
+ weight = CW_Constant;
+ }
+ break;
+ case 'G':
+ case 'C':
+ if (dyn_cast<ConstantFP>(CallOperandVal)) {
+ weight = CW_Constant;
+ }
+ break;
+ case 'e':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80000000LL) &&
+ (C->getSExtValue() <= 0x7fffffffLL))
+ weight = CW_Constant;
+ }
+ break;
+ case 'Z':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xffffffff)
+ weight = CW_Constant;
+ }
+ break;
+ }
+ return weight;
+}
+
/// LowerXConstraint - try to replace an X constraint, which matches anything,
/// with another that has more specific requirements based on the type of the
/// corresponding operand.