setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Expand FP immediates into loads from the stack, except for the special
// cases we handle.
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Special cases we handle for FP constants.
addLegalFPImmediate(APFloat(+0.0f)); // xorps
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f64 , Expand);
- setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
}
} else if (!TM.Options.UseSoftFloat) {
// f32 and f64 in x87.
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f32 , Expand);
- setOperationAction(ISD::FSIN , MVT::f64 , Expand);
- setOperationAction(ISD::FCOS , MVT::f32 , Expand);
- setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
}
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f80 , Expand);
- setOperationAction(ISD::FCOS , MVT::f80 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f80, Expand);
+ setOperationAction(ISD::FCOS , MVT::f80, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
}
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
setLibcallName(RTLIB::SRA_I128, 0);
}
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->isTargetDarwin()) {
+ // For MacOSX, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret to avoid memory
+ // traffic.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
+
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
- // Add the regs to the liveout set for the function.
- MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
- MRI.addLiveOut(RVLocs[i].getLocReg());
-
SDValue Flag;
-
SmallVector<SDValue, 6> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
// The x86-64 ABIs require that for returning structs by value we copy
Flag = Chain.getValue(1);
// RAX/EAX now acts like a return value.
- MRI.addLiveOut(RetValReg);
+ RetOps.push_back(DAG.getRegister(RetValReg, MVT::i64));
}
RetOps[0] = Chain; // Update chain.
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const {
+ SelectionDAG &DAG) const {
if (!IsTailCallConvention(CalleeCC) &&
CalleeCC != CallingConv::C)
return false;
// An stdcall caller is expected to clean up its arguments; the callee
// isn't going to do that.
- if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+ if (!CCMatch && CallerCC == CallingConv::X86_StdCall)
return false;
// Do not sibcall optimize vararg calls unless all arguments are passed via
// callee-saved registers are restored. These happen to be the same
// registers used to pass 'inreg' arguments so watch out for those.
if (!Subtarget->is64Bit() &&
- !isa<GlobalAddressSDNode>(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee)) {
+ ((!isa<GlobalAddressSDNode>(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) ||
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)) {
unsigned NumInRegs = 0;
+ // In PIC we need an extra register to formulate the address computation
+ // for the callee.
+ unsigned MaxInRegs =
+ (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc())
switch (Reg) {
default: break;
case X86::EAX: case X86::EDX: case X86::ECX:
- if (++NumInRegs == 3)
+ if (++NumInRegs == MaxInRegs)
return false;
break;
}
return SDValue();
}
+ LLVMContext *Context = DAG.getContext();
unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
- EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits);
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift);
+ EVT NeVT = EVT::getIntegerVT(*Context, NBits);
+ EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift);
if (!isTypeLegal(NVT))
return SDValue();
// If it's foldable, i.e. normal load with single use, we will let code
// selection to fold it. Otherwise, we will short the conversion sequence.
if (CIdx && CIdx->getZExtValue() == 0 &&
- (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse()))
+ (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
+ if (V.getValueSizeInBits() > V1.getValueSizeInBits()) {
+ // The "ext_vec_elt" node is wider than the result node.
+ // In this case we should extract subvector from V.
+ // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
+ unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits();
+ EVT FullVT = V.getValueType();
+ EVT SubVecVT = EVT::getVectorVT(*Context,
+ FullVT.getVectorElementType(),
+ FullVT.getVectorNumElements()/Ratio);
+ V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
+ DAG.getIntPtrConstant(0));
+ }
V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+ }
}
return DAG.getNode(ISD::BITCAST, DL, VT,
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
- LLVMContext *Context = DAG.getContext();
if (!Subtarget->hasSSE2())
return SDValue();
// Lower SHL with variable shift amount.
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
- Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1),
- DAG.getConstant(23, MVT::i32));
-
- const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U};
- Constant *C = ConstantDataVector::get(*Context, CV);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
- SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
- Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT));
Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
// a = a << 5;
- Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1),
- DAG.getConstant(5, MVT::i32));
+ Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT));
Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
// Turn 'a' into a mask suitable for VSELECT
Op.getOperand(1), Op.getOperand(2));
}
+SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
+
+ // For MacOSX, we want to call an alternative entry point: __sincos_stret,
+ // which returns the values in two XMM registers.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Only optimize x86_64 for now. i386 is a bit messy. For f32,
+ // the small struct {f32, f32} is returned in (eax, edx). For f64,
+ // the results are returned via SRet in memory.
+ const char *LibcallName = (ArgVT == MVT::f64)
+ ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
+ false, false, false, false, 0,
+ CallingConv::C, /*isTaillCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.first;
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
}
}
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) {
APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
- if (CondRHS.getConstantOperandVal(0) == -A-1) {
- SmallVector<SDValue, 32> V(VT.getVectorNumElements(),
- DAG.getConstant(-A, VT.getScalarType()));
+ if (CondRHS.getConstantOperandVal(0) == -A-1)
return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
- DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
- V.data(), V.size()));
- }
+ DAG.getConstant(-A, VT));
}
// Another special case: If C was a sign bit, the sub has been
// Represent the data using the same element type that is stored in
// memory. In practice, we ''widen'' MemVT.
- EVT WideVecVT =
- EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ EVT WideVecVT =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
loadRegZize/MemVT.getScalarType().getSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
// Build the arithmetic shift.
unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
MemVT.getVectorElementType().getSizeInBits();
- SmallVector<SDValue, 8> C(NumElems,
- DAG.getConstant(Amt, RegVT.getScalarType()));
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, RegVT, &C[0], C.size());
- Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, BV);
+ Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
+ DAG.getConstant(Amt, RegVT));
return DCI.CombineTo(N, Shuff, TF, true);
}
return SDValue();
}
-// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
-// as "sbb reg,reg", since it can be extended without zext and produces
+// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
+// as "sbb reg,reg", since it can be extended without zext and produces
// an all-ones bit which is more useful than 0/1 in some cases.
static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) {
return DAG.getNode(ISD::AND, DL, MVT::i8,
SDValue EFLAGS = N->getOperand(1);
if (CC == X86::COND_A) {
- // Try to convert COND_A into COND_B in an attempt to facilitate
+ // Try to convert COND_A into COND_B in an attempt to facilitate
// materializing "setb reg".
//
// Do not flip "e > c", where "c" is a constant, because Cmp instruction
// cannot take an immediate as its first operand.
//
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
EFLAGS.getValueType().isInteger() &&
!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(),
if (In.getOpcode() != X86ISD::VZEXT)
return SDValue();
- return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0));
+ return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0),
+ In.getOperand(0));
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
// really want an 8-bit or 32-bit register, map to the appropriate register
// class and return the appropriate register.
if (Res.second == &X86::GR16RegClass) {
- if (VT == MVT::i8) {
+ if (VT == MVT::i8 || VT == MVT::i1) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
Res.first = DestReg;
Res.second = &X86::GR8RegClass;
}
- } else if (VT == MVT::i32) {
+ } else if (VT == MVT::i32 || VT == MVT::f32) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
Res.first = DestReg;
Res.second = &X86::GR32RegClass;
}
- } else if (VT == MVT::i64) {
+ } else if (VT == MVT::i64 || VT == MVT::f64) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;