setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Expand FP immediates into loads from the stack, except for the special
// cases we handle.
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Special cases we handle for FP constants.
addLegalFPImmediate(APFloat(+0.0f)); // xorps
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f64 , Expand);
- setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
}
} else if (!TM.Options.UseSoftFloat) {
// f32 and f64 in x87.
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f32 , Expand);
- setOperationAction(ISD::FSIN , MVT::f64 , Expand);
- setOperationAction(ISD::FCOS , MVT::f32 , Expand);
- setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
}
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f80 , Expand);
- setOperationAction(ISD::FCOS , MVT::f80 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f80, Expand);
+ setOperationAction(ISD::FCOS , MVT::f80, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
}
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
setLibcallName(RTLIB::SRA_I128, 0);
}
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->isTargetDarwin()) {
+ // For MacOSX, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret to avoid memory
+ // traffic.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
+
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
- // Add the regs to the liveout set for the function.
- MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
- MRI.addLiveOut(RVLocs[i].getLocReg());
-
SDValue Flag;
-
SmallVector<SDValue, 6> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- // The x86-64 ABI for returning structs by value requires that we copy
- // the sret argument into %rax for the return. We saved the argument into
- // a virtual register in the entry block, so now we copy the value out
- // and into %rax.
+ // The x86-64 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // We saved the argument into a virtual register in the entry block,
+ // so now we copy the value out and into %rax/%eax.
if (Subtarget->is64Bit() &&
DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
MachineFunction &MF = DAG.getMachineFunction();
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
- Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+ unsigned RetValReg = Subtarget->isTarget64BitILP32() ? X86::EAX : X86::RAX;
+ Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
Flag = Chain.getValue(1);
- // RAX now acts like a return value.
- MRI.addLiveOut(X86::RAX);
+ // RAX/EAX now acts like a return value.
+ RetOps.push_back(DAG.getRegister(RetValReg, MVT::i64));
}
RetOps[0] = Chain; // Update chain.
InVals.push_back(ArgValue);
}
- // The x86-64 ABI for returning structs by value requires that we copy
- // the sret argument into %rax for the return. Save the argument into
- // a virtual register so that we can access it from the return points.
+ // The x86-64 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // Save the argument into a virtual register so that we can access it
+ // from the return points.
if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
- Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ MVT PtrTy = getPointerTy();
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
// This isn't right, although it's probably harmless on x86; liveouts
// should be computed from returns not tail calls. Consider a void
// function making a tail call to a function returning int.
- return DAG.getNode(X86ISD::TC_RETURN, dl,
- NodeTys, &Ops[0], Ops.size());
+ return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
}
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const {
+ SelectionDAG &DAG) const {
if (!IsTailCallConvention(CalleeCC) &&
CalleeCC != CallingConv::C)
return false;
// An stdcall caller is expected to clean up its arguments; the callee
// isn't going to do that.
- if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+ if (!CCMatch && CallerCC == CallingConv::X86_StdCall)
return false;
// Do not sibcall optimize vararg calls unless all arguments are passed via
// callee-saved registers are restored. These happen to be the same
// registers used to pass 'inreg' arguments so watch out for those.
if (!Subtarget->is64Bit() &&
- !isa<GlobalAddressSDNode>(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee)) {
+ ((!isa<GlobalAddressSDNode>(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) ||
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)) {
unsigned NumInRegs = 0;
+ // In PIC we need an extra register to formulate the address computation
+ // for the callee.
+ unsigned MaxInRegs =
+ (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc())
switch (Reg) {
default: break;
case X86::EAX: case X86::EDX: case X86::ECX:
- if (++NumInRegs == 3)
+ if (++NumInRegs == MaxInRegs)
return false;
break;
}
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFP:
- case X86ISD::PALIGN:
+ case X86ISD::PALIGNR:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
case X86ISD::MOVHLPS:
SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
- case X86ISD::PALIGN:
+ case X86ISD::PALIGNR:
case X86ISD::SHUFP:
case X86ISD::VPERM2X128:
return DAG.getNode(Opc, dl, VT, V1, V2,
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
- return ((isa<ConstantSDNode>(Elt) &&
- cast<ConstantSDNode>(Elt)->isNullValue()) ||
- (isa<ConstantFPSDNode>(Elt) &&
- cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Elt))
+ return CN->isNullValue();
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
+ return CFP->getValueAPF().isPosZero();
+ return false;
}
/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
case X86ISD::MOVLHPS:
DecodeMOVLHPSMask(NumElems, Mask);
break;
+ case X86ISD::PALIGNR:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
case X86ISD::PSHUFD:
case X86ISD::VPERMILP:
ImmN = N->getOperand(N->getNumOperands()-1);
case X86ISD::MOVLPS:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
- case X86ISD::PALIGN:
// Not yet implemented
return false;
default: llvm_unreachable("unknown target shuffle node");
// Check the mask for BLEND and build the value.
unsigned MaskValue = 0;
// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
- unsigned NumLanes = (NumElems-1)/8 + 1;
+ unsigned NumLanes = (NumElems-1)/8 + 1;
unsigned NumElemsInLane = NumElems / NumLanes;
// Blend for v16i16 should be symetric for the both lanes.
for (unsigned i = 0; i < NumElemsInLane; ++i) {
- int SndLaneEltIdx = (NumLanes == 2) ?
+ int SndLaneEltIdx = (NumLanes == 2) ?
SVOp->getMaskElt(i + NumElemsInLane) : -1;
int EltIdx = SVOp->getMaskElt(i);
- if ((EltIdx == -1 || EltIdx == (int)i) &&
- (SndLaneEltIdx == -1 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
+ if ((EltIdx < 0 || EltIdx == (int)i) &&
+ (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
continue;
- if (((unsigned)EltIdx == (i + NumElems)) &&
- (SndLaneEltIdx == -1 ||
+ if (((unsigned)EltIdx == (i + NumElems)) &&
+ (SndLaneEltIdx < 0 ||
(unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
MaskValue |= (1<<i);
- else
+ else
return SDValue();
}
// Convert i32 vectors to floating point if it is not AVX2.
// AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
- EVT BlendVT = VT;
+ MVT BlendVT = VT;
if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
- BlendVT = EVT::getVectorVT(*DAG.getContext(),
- EVT::getFloatingPointVT(EltVT.getSizeInBits()),
- NumElems);
+ BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
+ NumElems);
V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
}
-
- SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
- DAG.getConstant(MaskValue, MVT::i32));
+
+ SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
+ DAG.getConstant(MaskValue, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
}
}
}
+ // Promote splats to a larger type which usually leads to more efficient code.
+ // FIXME: Is this true if pshufb is available?
+ if (SVOp->isSplat())
+ return PromoteSplat(SVOp, DAG);
+
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
DebugLoc dl = SVOp->getDebugLoc();
ArrayRef<int> MaskVals = SVOp->getMask();
+ // Promote splats to a larger type which usually leads to more efficient code.
+ // FIXME: Is this true if pshufb is available?
+ if (SVOp->isSplat())
+ return PromoteSplat(SVOp, DAG);
+
// If we have SSSE3, case 1 is generated when all result bytes come from
// one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
// present, fall back to case 3.
return SDValue();
}
+ LLVMContext *Context = DAG.getContext();
unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
- EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits);
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift);
+ EVT NeVT = EVT::getIntegerVT(*Context, NBits);
+ EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift);
if (!isTypeLegal(NVT))
return SDValue();
// If it's foldable, i.e. normal load with single use, we will let code
// selection to fold it. Otherwise, we will short the conversion sequence.
if (CIdx && CIdx->getZExtValue() == 0 &&
- (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse()))
+ (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
+ if (V.getValueSizeInBits() > V1.getValueSizeInBits()) {
+ // The "ext_vec_elt" node is wider than the result node.
+ // In this case we should extract subvector from V.
+ // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
+ unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits();
+ EVT FullVT = V.getValueType();
+ EVT SubVecVT = EVT::getVectorVT(*Context,
+ FullVT.getVectorElementType(),
+ FullVT.getVectorNumElements()/Ratio);
+ V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
+ DAG.getIntPtrConstant(0));
+ }
V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+ }
}
return DAG.getNode(ISD::BITCAST, DL, VT,
// Handle splat operations
if (SVOp->isSplat()) {
- unsigned NumElem = VT.getVectorNumElements();
-
// Use vbroadcast whenever the splat comes from a foldable load
SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
if (Broadcast.getNode())
return Broadcast;
-
- // Handle splats by matching through known shuffle masks
- if ((VT.is128BitVector() && NumElem <= 4) ||
- (VT.is256BitVector() && NumElem <= 8))
- return SDValue();
-
- // All remaning splats are promoted to target supported vector shuffles.
- return PromoteSplat(SVOp, DAG);
}
// Check integer expanding shuffles.
// nodes, and remove one by one until they don't return Op anymore.
if (isPALIGNRMask(M, VT, Subtarget))
- return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+ return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2,
getShufflePALIGNRImmediate(SVOp),
DAG);
SDValue
X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
- int64_t Offset,
- SelectionDAG &DAG) const {
+ int64_t Offset, SelectionDAG &DAG) const {
// Create the TargetGlobalAddress node, folding in the constant
// offset if it is legal.
unsigned char OpFlags =
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
Subtarget->is64Bit(),
- getTargetMachine().getRelocationModel() == Reloc::PIC_);
+ getTargetMachine().getRelocationModel() == Reloc::PIC_);
}
llvm_unreachable("Unknown TLS model.");
}
SmallVector<Constant*,2> CV1;
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 0x4330000000000000ULL))));
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
"Custom UINT_TO_FP is not supported!");
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements());
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ SVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
}
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
}
-std::pair<SDValue,SDValue> X86TargetLowering::
-FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const {
+std::pair<SDValue,SDValue>
+X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned, bool IsReplace) const {
DebugLoc DL = Op.getDebugLoc();
EVT DstTy = Op.getValueType();
}
Constant *C;
if (EltVT == MVT::f64)
- C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, ~(1ULL << 63))));
else
- C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
+ APInt(32, ~(1U << 31))));
C = ConstantVector::getSplat(NumElts, C);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
}
Constant *C;
if (EltVT == MVT::f64)
- C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 1ULL << 63)));
else
- C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
+ APInt(32, 1U << 31)));
C = ConstantVector::getSplat(NumElts, C);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
// First get the sign bit of second operand.
SmallVector<Constant*,4> CV;
if (SrcVT == MVT::f64) {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+ const fltSemantics &Sem = APFloat::IEEEdouble;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 1ULL << 63))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
} else {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ const fltSemantics &Sem = APFloat::IEEEsingle;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 1U << 31))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
}
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
// Clear first operand sign bit.
CV.clear();
if (VT == MVT::f64) {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+ const fltSemantics &Sem = APFloat::IEEEdouble;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
+ APInt(64, ~(1ULL << 63)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
} else {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ const fltSemantics &Sem = APFloat::IEEEsingle;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
+ APInt(32, ~(1U << 31)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
//
-SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op,
+ SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
if (!Subtarget->hasSSE41())
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
- LLVMContext *Context = DAG.getContext();
if (!Subtarget->hasSSE2())
return SDValue();
// Lower SHL with variable shift amount.
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
- Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1),
- DAG.getConstant(23, MVT::i32));
-
- const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U};
- Constant *C = ConstantDataVector::get(*Context, CV);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
- SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
- Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT));
Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
// a = a << 5;
- Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1),
- DAG.getConstant(5, MVT::i32));
+ Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT));
Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
// Turn 'a' into a mask suitable for VSELECT
Op.getOperand(1), Op.getOperand(2));
}
+SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
+
+ // For MacOSX, we want to call an alternative entry point: __sincos_stret,
+ // which returns the values in two XMM registers.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Only optimize x86_64 for now. i386 is a bit messy. For f32,
+ // the small struct {f32, f32} is returned in (eax, edx). For f64,
+ // the results are returned via SRet in memory.
+ const char *LibcallName = (ArgVT == MVT::f64)
+ ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
+ false, false, false, false, 0,
+ CallingConv::C, /*isTaillCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.first;
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
}
}
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
- case X86ISD::PALIGN: return "X86ISD::PALIGN";
+ case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
MachineFunction::iterator I = MBB;
++I;
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 &&
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
"Unexpected number of operands");
assert(MI->hasOneMemOperand() &&
MachineFunction::iterator I = MBB;
++I;
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 &&
"Unexpected number of operands");
assert(MI->hasOneMemOperand() &&
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) {
APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
- if (CondRHS.getConstantOperandVal(0) == -A-1) {
- SmallVector<SDValue, 32> V(VT.getVectorNumElements(),
- DAG.getConstant(-A, VT.getScalarType()));
+ if (CondRHS.getConstantOperandVal(0) == -A-1)
return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
- DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
- V.data(), V.size()));
- }
+ DAG.getConstant(-A, VT));
}
// Another special case: If C was a sign bit, the sub has been
ConstantSDNode *CmpAgainst = 0;
if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
(CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
- dyn_cast<ConstantSDNode>(Cond.getOperand(0)) == 0) {
+ !isa<ConstantSDNode>(Cond.getOperand(0))) {
if (CC == X86::COND_NE &&
CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
if (VT == MVT::f32 || VT == MVT::f64) {
bool ExpectingFlags = false;
// Check for any users that want flags:
- for (SDNode::use_iterator UI = N->use_begin(),
- UE = N->use_end();
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
!ExpectingFlags && UI != UE; ++UI)
switch (UI->getOpcode()) {
default:
// Represent the data using the same element type that is stored in
// memory. In practice, we ''widen'' MemVT.
- EVT WideVecVT =
- EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ EVT WideVecVT =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
loadRegZize/MemVT.getScalarType().getSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
// Build the arithmetic shift.
unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
MemVT.getVectorElementType().getSizeInBits();
- SmallVector<SDValue, 8> C(NumElems,
- DAG.getConstant(Amt, RegVT.getScalarType()));
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, RegVT, &C[0], C.size());
- Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, BV);
+ Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
+ DAG.getConstant(Amt, RegVT));
return DCI.CombineTo(N, Shuff, TF, true);
}
return SDValue();
}
-// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
-// as "sbb reg,reg", since it can be extended without zext and produces
+// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
+// as "sbb reg,reg", since it can be extended without zext and produces
// an all-ones bit which is more useful than 0/1 in some cases.
static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) {
return DAG.getNode(ISD::AND, DL, MVT::i8,
SDValue EFLAGS = N->getOperand(1);
if (CC == X86::COND_A) {
- // Try to convert COND_A into COND_B in an attempt to facilitate
+ // Try to convert COND_A into COND_B in an attempt to facilitate
// materializing "setb reg".
//
// Do not flip "e > c", where "c" is a constant, because Cmp instruction
// cannot take an immediate as its first operand.
//
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
EFLAGS.getValueType().isInteger() &&
!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(),
if (In.getOpcode() != X86ISD::VZEXT)
return SDValue();
- return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0));
+ return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0),
+ In.getOperand(0));
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
- case X86ISD::PALIGN:
+ case X86ISD::PALIGNR:
case X86ISD::UNPCKH:
case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
// really want an 8-bit or 32-bit register, map to the appropriate register
// class and return the appropriate register.
if (Res.second == &X86::GR16RegClass) {
- if (VT == MVT::i8) {
+ if (VT == MVT::i8 || VT == MVT::i1) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
Res.first = DestReg;
Res.second = &X86::GR8RegClass;
}
- } else if (VT == MVT::i32) {
+ } else if (VT == MVT::i32 || VT == MVT::f32) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
Res.first = DestReg;
Res.second = &X86::GR32RegClass;
}
- } else if (VT == MVT::i64) {
+ } else if (VT == MVT::i64 || VT == MVT::f64) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;