X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=bdb7ea6869cd845ad4667e3a06f7e4b2aac77a5d;hb=23d1d5eb566dbd10a81d9ce2dc67ad1548110b08;hp=84c5ff2c37d2cd82368d157042c67ffce76640a2;hpb=0ee17006b1b65204ab95360b98d04304bf206c59;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 84c5ff2c37d..bdb7ea6869c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -526,16 +526,8 @@ void X86TargetLowering::resetOperationActions() { if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); - setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); - // On X86 and X86-64, atomic operations are lowered to locked instructions. - // Locked instructions, in turn, have implicit fence semantics (all memory - // operations are flushed before issuing the locked instruction, and they - // are not buffered), so we can fold away the common pattern of - // fence-atomic-fence. - setShouldFoldAtomicFences(true); - // Expand certain atomics for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) { MVT VT = IntVTs[i]; @@ -1369,7 +1361,7 @@ void X86TargetLowering::resetOperationActions() { setPrefFunctionAlignment(4); // 2^4 bytes. } -EVT X86TargetLowering::getSetCCResultType(EVT VT) const { +EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) return MVT::i8; return VT.changeVectorElementTypeToInteger(); } @@ -1803,8 +1795,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80; SDValue Ops[] = { Chain, InFlag }; Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT, - MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)), 1); + MVT::Other, MVT::Glue, Ops), 1); Val = Chain.getValue(0); // Round the f80 to the right size, which also moves it to the appropriate @@ -4765,19 +4756,27 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, /// getNumOfConsecutiveZeros - Return the number of elements of a vector /// shuffle operation which come from a consecutively from a zero. The /// search can start in two different directions, from left or right. -static -unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems, - bool ZerosFromLeft, SelectionDAG &DAG) { - unsigned i; - for (i = 0; i != NumElems; ++i) { - unsigned Index = ZerosFromLeft ? i : NumElems-i-1; +/// We count undefs as zeros until PreferredNum is reached. +static unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, + unsigned NumElems, bool ZerosFromLeft, + SelectionDAG &DAG, + unsigned PreferredNum = -1U) { + unsigned NumZeros = 0; + for (unsigned i = 0; i != NumElems; ++i) { + unsigned Index = ZerosFromLeft ? i : NumElems - i - 1; SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0); - if (!(Elt.getNode() && - (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt)))) + if (!Elt.getNode()) + break; + + if (X86::isZeroNode(Elt)) + ++NumZeros; + else if (Elt.getOpcode() == ISD::UNDEF) // Undef as zero up to PreferredNum. + NumZeros = std::min(NumZeros + 1, PreferredNum); + else break; } - return i; + return NumZeros; } /// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE) @@ -4815,8 +4814,9 @@ bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); - unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, - false /* check zeros from right */, DAG); + unsigned NumZeros = getNumOfConsecutiveZeros( + SVOp, NumElems, false /* check zeros from right */, DAG, + SVOp->getMaskElt(0)); unsigned OpSrc; if (!NumZeros) @@ -4848,8 +4848,9 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); - unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, - true /* check zeros from left */, DAG); + unsigned NumZeros = getNumOfConsecutiveZeros( + SVOp, NumElems, true /* check zeros from left */, DAG, + NumElems - SVOp->getMaskElt(NumElems - 1) - 1); unsigned OpSrc; if (!NumZeros) @@ -5108,15 +5109,27 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, // load of the entire vector width starting at the base pointer. If we found // consecutive loads for the low half, generate a vzext_load node. if (LastLoadedElt == NumElems - 1) { + SDValue NewLd = SDValue(); if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16) - return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), - LDBase->getPointerInfo(), - LDBase->isVolatile(), LDBase->isNonTemporal(), - LDBase->isInvariant(), 0); - return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), - LDBase->getPointerInfo(), - LDBase->isVolatile(), LDBase->isNonTemporal(), - LDBase->isInvariant(), LDBase->getAlignment()); + NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), + LDBase->getPointerInfo(), + LDBase->isVolatile(), LDBase->isNonTemporal(), + LDBase->isInvariant(), 0); + NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), + LDBase->getPointerInfo(), + LDBase->isVolatile(), LDBase->isNonTemporal(), + LDBase->isInvariant(), LDBase->getAlignment()); + + if (LDBase->hasAnyUseOfValue(1)) { + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + SDValue(LDBase, 1), + SDValue(NewLd.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); + DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), + SDValue(NewLd.getNode(), 1)); + } + + return NewLd; } if (NumElems == 4 && LastLoadedElt == 1 && DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) { @@ -6721,10 +6734,10 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)). unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits(); EVT FullVT = V.getValueType(); - EVT SubVecVT = EVT::getVectorVT(*Context, + EVT SubVecVT = EVT::getVectorVT(*Context, FullVT.getVectorElementType(), FullVT.getVectorNumElements()/Ratio); - V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V, + V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V, DAG.getIntPtrConstant(0)); } V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V); @@ -6880,6 +6893,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { TargetMask, DAG); } + if (isPALIGNRMask(M, VT, Subtarget)) + return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2, + getShufflePALIGNRImmediate(SVOp), + DAG); + // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; @@ -6997,11 +7015,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // inlined here right now to enable us to directly emit target specific // nodes, and remove one by one until they don't return Op anymore. - if (isPALIGNRMask(M, VT, Subtarget)) - return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2, - getShufflePALIGNRImmediate(SVOp), - DAG); - if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { if (VT == MVT::v2f64 || VT == MVT::v2i64) @@ -8249,7 +8262,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, APInt FF(32, 0x5F800000ULL); // Check whether the sign bit is set. - SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), + SDValue SignSet = DAG.getSetCC(dl, + getSetCCResultType(*DAG.getContext(), MVT::i64), Op.getOperand(0), DAG.getConstant(0, MVT::i64), ISD::SETLT); @@ -9189,14 +9203,6 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, } if (LHS.getNode()) { - // If the LHS is of the form (x ^ -1) then replace the LHS with x and flip - // the condition code later. - bool Invert = false; - if (LHS.getOpcode() == ISD::XOR && isAllOnes(LHS.getOperand(1))) { - Invert = true; - LHS = LHS.getOperand(0); - } - // If LHS is i8, promote it to i32 with any_extend. There is no i8 BT // instruction. Since the shift amount is in-range-or-undefined, we know // that doing a bittest on the i32 value is ok. We extend to i32 because @@ -9213,9 +9219,6 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; - // Flip the condition if the LHS was a not instruction - if (Invert) - Cond = X86::GetOppositeBranchCondition(Cond); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(Cond, MVT::i8), BT); } @@ -9356,29 +9359,31 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, if (Swap) std::swap(Op0, Op1); - // Since SSE has no unsigned integer comparisons, we need to flip the sign - // bits of the inputs before performing those operations. - if (FlipSigns) { - EVT EltVT = VT.getVectorElementType(); - SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), - EltVT); - std::vector SignBits(VT.getVectorNumElements(), SignBit); - SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0], - SignBits.size()); - Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec); - Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec); - } - // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). if (VT == MVT::v2i64) { if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) { assert(Subtarget->hasSSE2() && "Don't know how to lower!"); - // First cast everything to the right type, + // First cast everything to the right type. Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); + // Since SSE has no unsigned integer comparisons, we need to flip the sign + // bits of the inputs before performing those operations. The lower + // compare is always unsigned. + SDValue SB; + if (FlipSigns) { + SB = DAG.getConstant(0x80000000U, MVT::v4i32); + } else { + SDValue Sign = DAG.getConstant(0x80000000U, MVT::i32); + SDValue Zero = DAG.getConstant(0x00000000U, MVT::i32); + SB = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + Sign, Zero, Sign, Zero); + } + Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB); + Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB); + // Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2)) SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1); @@ -9404,7 +9409,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // pcmpeqd + pshufd + pand. assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!"); - // First cast everything to the right type, + // First cast everything to the right type. Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); @@ -9423,6 +9428,15 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, } } + // Since SSE has no unsigned integer comparisons, we need to flip the sign + // bits of the inputs before performing those operations. + if (FlipSigns) { + EVT EltVT = VT.getVectorElementType(); + SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), VT); + Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB); + Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB); + } + SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); // If the logical-not of the result is required, perform that now. @@ -11039,7 +11053,10 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; + unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); + assert(((FrameReg == X86::RBP && VT == MVT::i64) || + (FrameReg == X86::EBP && VT == MVT::i32)) && + "Invalid Frame Register!"); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -11059,21 +11076,23 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Handler = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, - Subtarget->is64Bit() ? X86::RBP : X86::EBP, - getPointerTy()); - unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); - - SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame, - DAG.getIntPtrConstant(RegInfo->getSlotSize())); - StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); + EVT PtrVT = getPointerTy(); + unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); + assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || + (FrameReg == X86::EBP && PtrVT == MVT::i32)) && + "Invalid Frame Register!"); + SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); + unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX; + + SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame, + DAG.getIntPtrConstant(RegInfo->getSlotSize())); + StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); - return DAG.getNode(X86ISD::EH_RETURN, dl, - MVT::Other, - Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); + return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain, + DAG.getRegister(StoreAddrReg, PtrVT)); } SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, @@ -12125,52 +12144,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, } } -static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); - - // Go ahead and emit the fence on x86-64 even if we asked for no-sse2. - // There isn't any reason to disable it if the target processor supports it. - if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) { - SDValue Chain = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, MVT::i32); - SDValue Ops[] = { - DAG.getRegister(X86::ESP, MVT::i32), // Base - DAG.getTargetConstant(1, MVT::i8), // Scale - DAG.getRegister(0, MVT::i32), // Index - DAG.getTargetConstant(0, MVT::i32), // Disp - DAG.getRegister(0, MVT::i32), // Segment. - Zero, - Chain - }; - SDNode *Res = - DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops, - array_lengthof(Ops)); - return SDValue(Res, 0); - } - - unsigned isDev = cast(Op.getOperand(5))->getZExtValue(); - if (!isDev) - return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); - - unsigned Op1 = cast(Op.getOperand(1))->getZExtValue(); - unsigned Op2 = cast(Op.getOperand(2))->getZExtValue(); - unsigned Op3 = cast(Op.getOperand(3))->getZExtValue(); - unsigned Op4 = cast(Op.getOperand(4))->getZExtValue(); - - // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; - if (!Op1 && !Op2 && !Op3 && Op4) - return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0)); - - // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; - if (Op1 && !Op2 && !Op3 && !Op4) - return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0)); - - // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), - // (MFENCE)>; - return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); -} - static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); @@ -12199,9 +12172,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, Zero, Chain }; - SDNode *Res = - DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops, - array_lengthof(Ops)); + SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops); return SDValue(Res, 0); } @@ -12404,7 +12375,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG); - case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, Subtarget, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op, Subtarget, DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); @@ -16305,8 +16275,7 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts -/// when possible. +/// PerformShiftCombine - Combine shifts. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -17467,7 +17436,7 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, +static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); if (!VT.isVector()) @@ -17487,14 +17456,14 @@ static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, N0.getOpcode() == ISD::SIGN_EXTEND)) { SDValue N00 = N0.getOperand(0); - // EXTLOAD has a better solution on AVX2, + // EXTLOAD has a better solution on AVX2, // it may be replaced with X86ISD::VSEXT node. if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256()) if (!ISD::isNormalLoad(N00.getNode())) return SDValue(); if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) { - SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, + SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, N00, N1); return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp); }