X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=478bf71c686c6d7b732cc94e7a36846a13de90ba;hb=1292c226458b68a119d3a387a0527f453b2065c2;hp=f698ceceac52f3ea6bdf1da47887e29863fdc860;hpb=d155d7e428e0e14b520dddbf00b6745d7eb2d7b8;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f698ceceac5..478bf71c686 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -104,8 +104,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2"); setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::X86_StdCall); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C); + setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C); } if (Subtarget->isTargetDarwin()) { @@ -226,12 +226,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // TODO: when we have SSE, these could be more efficient, by using movd/movq. if (!X86ScalarSSEf64) { - setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); - setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); + setOperationAction(ISD::BITCAST , MVT::f32 , Expand); + setOperationAction(ISD::BITCAST , MVT::i32 , Expand); if (Subtarget->is64Bit()) { - setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand); + setOperationAction(ISD::BITCAST , MVT::f64 , Expand); // Without SSE, i64->f64 goes through memory. - setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand); + setOperationAction(ISD::BITCAST , MVT::i64 , Expand); } } @@ -285,21 +285,27 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); - setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTTZ , MVT::i8 , Custom); setOperationAction(ISD::CTLZ , MVT::i8 , Custom); - setOperationAction(ISD::CTPOP , MVT::i16 , Expand); setOperationAction(ISD::CTTZ , MVT::i16 , Custom); setOperationAction(ISD::CTLZ , MVT::i16 , Custom); - setOperationAction(ISD::CTPOP , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Custom); setOperationAction(ISD::CTLZ , MVT::i32 , Custom); if (Subtarget->is64Bit()) { - setOperationAction(ISD::CTPOP , MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Custom); setOperationAction(ISD::CTLZ , MVT::i64 , Custom); } + if (Subtarget->hasPOPCNT()) { + setOperationAction(ISD::CTPOP , MVT::i8 , Promote); + } else { + setOperationAction(ISD::CTPOP , MVT::i8 , Expand); + setOperationAction(ISD::CTPOP , MVT::i16 , Expand); + setOperationAction(ISD::CTPOP , MVT::i32 , Expand); + if (Subtarget->is64Bit()) + setOperationAction(ISD::CTPOP , MVT::i64 , Expand); + } + setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); setOperationAction(ISD::BSWAP , MVT::i16 , Expand); @@ -521,13 +527,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); { - bool ignored; - APFloat TmpFlt(+0.0); - TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, - &ignored); + APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended); addLegalFPImmediate(TmpFlt); // FLD0 TmpFlt.changeSign(); addLegalFPImmediate(TmpFlt); // FLD0/FCHS + + bool ignored; APFloat TmpFlt2(+1.0); TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, &ignored); @@ -654,10 +659,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v4i16, Expand); setOperationAction(ISD::SELECT, MVT::v2i32, Expand); setOperationAction(ISD::SELECT, MVT::v1i64, Expand); - setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Expand); - setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Expand); - setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Expand); - setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Expand); + setOperationAction(ISD::BITCAST, MVT::v8i8, Expand); + setOperationAction(ISD::BITCAST, MVT::v4i16, Expand); + setOperationAction(ISD::BITCAST, MVT::v2i32, Expand); + setOperationAction(ISD::BITCAST, MVT::v1i64, Expand); if (!UseSoftFloat && Subtarget->hasSSE1()) { addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); @@ -1102,16 +1107,6 @@ unsigned X86TargetLowering::getJumpTableEncoding() const { return TargetLowering::getJumpTableEncoding(); } -/// getPICBaseSymbol - Return the X86-32 PIC base. -MCSymbol * -X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF, - MCContext &Ctx) const { - const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo(); - return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+ - Twine(MF->getFunctionNumber())+"$pb"); -} - - const MCExpr * X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, @@ -1146,7 +1141,7 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); // Otherwise, the reference is relative to the PIC base. - return MCSymbolRefExpr::Create(getPICBaseSymbol(MF, Ctx), Ctx); + return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx); } /// getFunctionAlignment - Return the Log2 alignment of this function. @@ -1182,7 +1177,9 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{ unsigned X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0; + const TargetFrameInfo *TFI = MF.getTarget().getFrameInfo(); + + unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; switch (RC->getID()) { default: return 0; @@ -1301,13 +1298,13 @@ X86TargetLowering::LowerReturn(SDValue Chain, if (Subtarget->is64Bit()) { if (ValVT == MVT::x86mmx) { if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { - ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); + ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy); ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. if (!Subtarget->hasSSE2()) - ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy); + ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy); } } } @@ -1346,6 +1343,28 @@ X86TargetLowering::LowerReturn(SDValue Chain, MVT::Other, &RetOps[0], RetOps.size()); } +bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const { + if (N->getNumValues() != 1) + return false; + if (!N->hasNUsesOfValue(1, 0)) + return false; + + SDNode *Copy = *N->use_begin(); + if (Copy->getOpcode() != ISD::CopyToReg && + Copy->getOpcode() != ISD::FP_EXTEND) + return false; + + bool HasRet = false; + for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); + UI != UE; ++UI) { + if (UI->getOpcode() != X86ISD::RET_FLAG) + return false; + HasRet = true; + } + + return HasRet; +} + /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. /// @@ -1414,7 +1433,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, MVT::i64, InFlag).getValue(1); Val = Chain.getValue(0); } - Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val); + Val = DAG.getNode(ISD::BITCAST, dl, CopyVT, Val); } else { Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag).getValue(1); @@ -1457,30 +1476,6 @@ ArgsAreStructReturn(const SmallVectorImpl &Ins) { return Ins[0].Flags.isSRet(); } -/// CCAssignFnForNode - Selects the correct CCAssignFn for a the -/// given CallingConvention value. -CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { - if (Subtarget->is64Bit()) { - if (CC == CallingConv::GHC) - return CC_X86_64_GHC; - else if (Subtarget->isTargetWin64()) - return CC_X86_Win64_C; - else - return CC_X86_64_C; - } - - if (CC == CallingConv::X86_FastCall) - return CC_X86_32_FastCall; - else if (CC == CallingConv::X86_ThisCall) - return CC_X86_32_ThisCall; - else if (CC == CallingConv::Fast) - return CC_X86_32_FastCC; - else if (CC == CallingConv::GHC) - return CC_X86_32_GHC; - else - return CC_X86_32_C; -} - /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval @@ -1576,7 +1571,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); + CCInfo.AnalyzeFormalArguments(Ins, CC_X86); unsigned LastVal = ~0U; SDValue ArgValue; @@ -1621,7 +1616,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::BCvt) - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); if (VA.isExtInLoc()) { // Handle MMX values passed in XMM regs. @@ -1895,7 +1890,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); + CCInfo.AnalyzeCallOperands(Outs, CC_X86); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -1954,14 +1949,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CCValAssign::AExt: if (RegVT.isVector() && RegVT.getSizeInBits() == 128) { // Special case: passing MMX values in XMM registers. - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); } else Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg); break; case CCValAssign::Indirect: { // Store the argument. @@ -2174,8 +2169,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { unsigned char OpFlags = 0; - // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external - // symbols should go through the PLT. + // On ELF targets, in either X86-64 or X86-32 mode, direct calls to + // external symbols should go through the PLT. if (Subtarget->isTargetELF() && getTargetMachine().getRelocationModel() == Reloc::PIC_) { OpFlags = X86II::MO_PLT; @@ -2478,7 +2473,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, SmallVector ArgLocs; CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + CCInfo.AnalyzeCallOperands(Outs, CC_X86); if (CCInfo.getNextStackOffset()) { MachineFunction &MF = DAG.getMachineFunction(); if (MF.getInfo()->getBytesToPopOnReturn()) @@ -2533,7 +2528,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } // An stdcall caller is expected to clean up its arguments; the callee - // isn't going to do that. PR 8461. + // isn't going to do that. if (!CCMatch && CallerCC==CallingConv::X86_StdCall) return false; @@ -3533,7 +3528,7 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8); } - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); + return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } /// getOnesVector - Returns a vector of specified type with all bits set. @@ -3546,7 +3541,7 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); SDValue Vec; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); + return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } @@ -3631,9 +3626,9 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { // Perform the splat. int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); + V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1); V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); + return DAG.getNode(ISD::BITCAST, dl, VT, V1); } /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified @@ -3757,7 +3752,7 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, } // Actual nodes that may contain scalar elements - if (Opcode == ISD::BIT_CONVERT) { + if (Opcode == ISD::BITCAST) { V = V.getOperand(0); EVT SrcVT = V.getValueType(); unsigned NumElems = VT.getVectorNumElements(); @@ -3946,7 +3941,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, } } - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V); } /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. @@ -3987,8 +3982,8 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, const TargetLowering &TLI, DebugLoc dl) { EVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; - SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opc, dl, ShVT, SrcOp, DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); } @@ -4055,8 +4050,8 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, LD->getPointerInfo().getWithOffset(StartOffset), false, false, 0); // Canonicalize it to a v4i32 shuffle. - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getVectorShuffle(MVT::v4i32, dl, V1, DAG.getUNDEF(MVT::v4i32),&Mask[0])); } @@ -4124,7 +4119,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i32, LDBase->getMemOperand()); - return DAG.getNode(ISD::BIT_CONVERT, DL, VT, ResNode); + return DAG.getNode(ISD::BITCAST, DL, VT, ResNode); } return SDValue(); } @@ -4216,7 +4211,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DAG.getUNDEF(Item.getValueType()), &Mask[0]); } - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item); } } @@ -4240,7 +4235,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), DAG); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item); + return DAG.getNode(ISD::BITCAST, dl, VT, Item); } } @@ -4433,21 +4428,21 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 || ResVT == MVT::v8i16 || ResVT == MVT::v16i8); int Mask[2]; - SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0)); + SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0)); SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); InVec = Op.getOperand(1); if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { unsigned NumElts = ResVT.getVectorNumElements(); - VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp); + VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp, InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1)); } else { - InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec); + InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec); SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); Mask[0] = 0; Mask[1] = 2; VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask); } - return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp); + return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); } // v8i16 shuffles - Prefer shuffles in the following order: @@ -4529,9 +4524,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); NewV = DAG.getVectorShuffle(MVT::v2i64, dl, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); - NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1), + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]); + NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV); // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the // source words for the shuffle, to aid later transformations. @@ -4600,12 +4595,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8)); } - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1); + V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1); V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); if (!TwoInputs) - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); + return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); // Calculate the shuffle mask for the second input, shuffle it, and // OR it with the first shuffled input. @@ -4620,12 +4615,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8)); pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8)); } - V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2); + V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2); V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); + return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); } // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order, @@ -4792,8 +4787,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, // No SSSE3 - Calculate in place words and then fix all out of place words // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from // the 16 different words that comprise the two doublequadword input vectors. - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); - V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V2); + V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2); SDValue NewV = V2Only ? V2 : V1; for (int i = 0; i != 8; ++i) { int Elt0 = MaskVals[i*2]; @@ -4855,7 +4850,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt, DAG.getIntPtrConstant(i)); } - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, NewV); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV); } /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide @@ -4899,8 +4894,8 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, MaskVec.push_back(StartIdx / Scale); } - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1); - V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2); + V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2); return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]); } @@ -4917,13 +4912,13 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, // movssrr and movsdrr do not clear top bits. Try to use movd, movq // instead. MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; - if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) && + if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) && SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR && - SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT && + SrcOp.getOperand(0).getOpcode() == ISD::BITCAST && SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) { // PR2108 OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32; - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, OpVT, @@ -4933,9 +4928,9 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, } } - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT, - DAG.getNode(ISD::BIT_CONVERT, dl, + DAG.getNode(ISD::BITCAST, dl, OpVT, SrcOp))); } @@ -5089,7 +5084,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } static bool MayFoldVectorLoad(SDValue V) { - if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT) + if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) V = V.getOperand(0); @@ -5106,7 +5101,7 @@ static bool MayFoldVectorLoad(SDValue V) { // one use. Remove this version after this bug get fixed. // rdar://8434668, PR8156 static bool RelaxedMayFoldVectorLoad(SDValue V) { - if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT) + if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) V = V.getOperand(0); @@ -5144,7 +5139,7 @@ bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG, // If the bit convert changed the number of elements, it is unsafe // to examine the mask. bool HasShuffleIntoBitcast = false; - if (V.getOpcode() == ISD::BIT_CONVERT) { + if (V.getOpcode() == ISD::BITCAST) { EVT SrcVT = V.getOperand(0).getValueType(); if (SrcVT.getVectorNumElements() != VT.getVectorNumElements()) return false; @@ -5159,7 +5154,7 @@ bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG, V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1); // Skip one more bit_convert if necessary - if (V.getOpcode() == ISD::BIT_CONVERT) + if (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); if (ISD::isNormalLoad(V.getNode())) { @@ -5196,8 +5191,8 @@ SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) { EVT VT = Op.getValueType(); // Canonizalize to v2f64. - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, V1); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); + return DAG.getNode(ISD::BITCAST, dl, VT, getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64, V1, DAG)); } @@ -5351,7 +5346,7 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, if (VT == MVT::v8i16 || VT == MVT::v16i8) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, NewOp); + return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. @@ -5661,7 +5656,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, if (Idx == 0) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BIT_CONVERT, dl, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), Op.getOperand(1))); @@ -5682,14 +5677,14 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, if ((User->getOpcode() != ISD::STORE || (isa(Op.getOperand(1)) && cast(Op.getOperand(1))->isNullValue())) && - (User->getOpcode() != ISD::BIT_CONVERT || + (User->getOpcode() != ISD::BITCAST || User->getValueType(0) != MVT::i32)) return SDValue(); SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), Op.getOperand(1)); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Extract); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract); } else if (VT == MVT::i32) { // ExtractPS works with constant index. if (isa(Op.getOperand(1))) @@ -5720,7 +5715,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, if (Idx == 0) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BIT_CONVERT, dl, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Vec), Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. @@ -5851,7 +5846,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 && "Expected an SSE type!"); - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt)); } @@ -5918,12 +5913,11 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); // With PIC, the address is actually $g + Offset. - if (OpFlag) { + if (OpFlag) Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy()), Result); - } return Result; } @@ -6423,7 +6417,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, MachinePointerInfo::getConstantPool(), false, false, 16); SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); - SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); + SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, MachinePointerInfo::getConstantPool(), false, false, 16); @@ -6453,19 +6447,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, DAG.getIntPtrConstant(0))); Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Load), + DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load), DAG.getIntPtrConstant(0)); // Or the load with the bias. SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Load)), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias))); Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Or), + DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or), DAG.getIntPtrConstant(0)); // Subtract the bias. @@ -6723,11 +6717,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo::getConstantPool(), false, false, 16); if (VT.isVector()) { - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(ISD::XOR, dl, MVT::v2i64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op.getOperand(0)), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, Mask))); + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask))); } else { return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask); } @@ -6779,7 +6773,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit); SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit, DAG.getConstant(32, MVT::i32)); - SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, SignBit); + SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit); SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit, DAG.getIntPtrConstant(0)); } @@ -7928,7 +7922,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const } EVT VT = Op.getValueType(); - ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt); + ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(NewIntNo, MVT::i32), Op.getOperand(1), ShAmt); @@ -8362,7 +8356,7 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const { false, false, 16); Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend); - Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op); + Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op); Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); return DAG.getNode(ISD::MUL, dl, VT, Op, R); } @@ -8583,16 +8577,16 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } -SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op, +SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); EVT DstVT = Op.getValueType(); assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() && Subtarget->hasMMX() && !DisableMMX) && - "Unexpected custom BIT_CONVERT"); + "Unexpected custom BITCAST"); assert((DstVT == MVT::i64 || (DstVT.isVector() && DstVT.getSizeInBits()==64)) && - "Unexpected custom BIT_CONVERT"); + "Unexpected custom BITCAST"); // i64 <=> MMX conversions are Legal. if (SrcVT==MVT::i64 && DstVT.isVector()) return Op; @@ -8675,7 +8669,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SMULO: case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); - case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG); + case ISD::BITCAST: return LowerBITCAST(Op, DAG); } } @@ -9438,15 +9432,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { - assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) && "Target must have SSE4.2 or AVX features enabled"); DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned Opc; - if (!Subtarget->hasAVX()) { if (memArg) Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; @@ -9459,20 +9450,59 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr; } - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc)); - + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); for (unsigned i = 0; i < numArgs; ++i) { MachineOperand &Op = MI->getOperand(i+1); - if (!(Op.isReg() && Op.isImplicit())) MIB.addOperand(Op); } - - BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) + BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) .addReg(X86::XMM0); MI->eraseFromParent(); + return BB; +} +MachineBasicBlock * +X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + // Address into RAX/EAX, other two args into ECX, EDX. + unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; + unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); + for (int i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(i)); + + unsigned ValOps = X86::AddrNumOperands; + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) + .addReg(MI->getOperand(ValOps).getReg()); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX) + .addReg(MI->getOperand(ValOps+1).getReg()); + + // The instruction doesn't actually take any operands though. + BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr)); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; +} + +MachineBasicBlock * +X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + // First arg in ECX, the second in EAX. + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) + .addReg(MI->getOperand(0).getReg()); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX) + .addReg(MI->getOperand(1).getReg()); + + // The instruction doesn't actually take any operands though. + BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr)); + + MI->eraseFromParent(); // The pseudo is gone now. return BB; } @@ -10075,6 +10105,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::VPCMPESTRM128MEM: return EmitPCMP(MI, BB, 5, true /* in mem */); + // Thread synchronization. + case X86::MONITOR: + return EmitMonitor(MI, BB); + case X86::MWAIT: + return EmitMwait(MI, BB); + // Atomic Lowering. case X86::ATOMAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, @@ -11210,13 +11246,13 @@ static SDValue PerformBTCombine(SDNode *N, static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { SDValue Op = N->getOperand(0); - if (Op.getOpcode() == ISD::BIT_CONVERT) + if (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); EVT VT = N->getValueType(0), OpVT = Op.getValueType(); if (Op.getOpcode() == X86ISD::VZEXT_LOAD && VT.getVectorElementType().getSizeInBits() == OpVT.getVectorElementType().getSizeInBits()) { - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); } return SDValue(); } @@ -11428,13 +11464,13 @@ static bool LowerToBSwap(CallInst *CI) { bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { InlineAsm *IA = cast(CI->getCalledValue()); - std::vector Constraints = IA->ParseConstraints(); + InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints(); std::string AsmStr = IA->getAsmString(); // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" SmallVector AsmPieces; - SplitString(AsmStr, AsmPieces, "\n"); // ; as separator? + SplitString(AsmStr, AsmPieces, ";\n"); switch (AsmPieces.size()) { default: return false; @@ -11475,6 +11511,35 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { } break; case 3: + if (CI->getType()->isIntegerTy(32) && + IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { + SmallVector Words; + SplitString(AsmPieces[0], Words, " \t,"); + if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" && + Words[2] == "${0:w}") { + Words.clear(); + SplitString(AsmPieces[1], Words, " \t,"); + if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" && + Words[2] == "$0") { + Words.clear(); + SplitString(AsmPieces[2], Words, " \t,"); + if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" && + Words[2] == "${0:w}") { + AsmPieces.clear(); + const std::string &Constraints = IA->getConstraintString(); + SplitString(StringRef(Constraints).substr(5), AsmPieces, ","); + std::sort(AsmPieces.begin(), AsmPieces.end()); + if (AsmPieces.size() == 4 && + AsmPieces[0] == "~{cc}" && + AsmPieces[1] == "~{dirflag}" && + AsmPieces[2] == "~{flags}" && + AsmPieces[3] == "~{fpsr}") { + return LowerToBSwap(CI); + } + } + } + } + } if (CI->getType()->isIntegerTy(64) && Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && @@ -11508,18 +11573,32 @@ X86TargetLowering::ConstraintType X86TargetLowering::getConstraintType(const std::string &Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { - case 'A': - return C_Register; - case 'f': - case 'r': case 'R': - case 'l': case 'q': case 'Q': - case 'x': + case 'f': + case 't': + case 'u': case 'y': + case 'x': case 'Y': return C_RegisterClass; + case 'a': + case 'b': + case 'c': + case 'd': + case 'S': + case 'D': + case 'A': + return C_Register; + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'G': + case 'C': case 'e': case 'Z': return C_Other; @@ -11530,30 +11609,106 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } -/// Examine constraint type and operand type and determine a weight value, -/// where: -1 = invalid match, and 0 = so-so match to 3 = good match. +/// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. -int X86TargetLowering::getSingleConstraintMatchWeight( +TargetLowering::ConstraintWeight + X86TargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { - int weight = -1; + ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (CallOperandVal == NULL) - return 0; + return CW_Default; + const Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: - return TargetLowering::getSingleConstraintMatchWeight(info, constraint); + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + case 'R': + case 'q': + case 'Q': + case 'a': + case 'b': + case 'c': + case 'd': + case 'S': + case 'D': + case 'A': + if (CallOperandVal->getType()->isIntegerTy()) + weight = CW_SpecificReg; + break; + case 'f': + case 't': + case 'u': + if (type->isFloatingPointTy()) + weight = CW_SpecificReg; + break; + case 'y': + if (type->isX86_MMXTy() && !DisableMMX && Subtarget->hasMMX()) + weight = CW_SpecificReg; + break; + case 'x': + case 'Y': + if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) + weight = CW_Register; break; case 'I': if (ConstantInt *C = dyn_cast(info.CallOperandVal)) { if (C->getZExtValue() <= 31) - weight = 3; + weight = CW_Constant; + } + break; + case 'J': + if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if (C->getZExtValue() <= 63) + weight = CW_Constant; + } + break; + case 'K': + if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f)) + weight = CW_Constant; + } + break; + case 'L': + if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff)) + weight = CW_Constant; + } + break; + case 'M': + if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if (C->getZExtValue() <= 3) + weight = CW_Constant; + } + break; + case 'N': + if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if (C->getZExtValue() <= 0xff) + weight = CW_Constant; + } + break; + case 'G': + case 'C': + if (dyn_cast(CallOperandVal)) { + weight = CW_Constant; + } + break; + case 'e': + if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if ((C->getSExtValue() >= -0x80000000LL) && + (C->getSExtValue() <= 0x7fffffffLL)) + weight = CW_Constant; + } + break; + case 'Z': + if (ConstantInt *C = dyn_cast(CallOperandVal)) { + if (C->getZExtValue() <= 0xffffffff) + weight = CW_Constant; } break; - // etc. } return weight; }