#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
// For 64-bit since we have so many registers use the ILP scheduler, for
// 32-bit code use the register pressure specific scheduling.
+ // For 32 bit Atom, use Hybrid (register pressure + latency) scheduling.
if (Subtarget->is64Bit())
setSchedulingPreference(Sched::ILP);
+ else if (Subtarget->isAtom())
+ setSchedulingPreference(Sched::Hybrid);
else
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SINT_TO_FP);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
// registers.
if (UseRegMask) {
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
- Ops.push_back(DAG.getRegisterMask(Mask));
+ if (const uint32_t *Mask = TRI->getCallPreservedMask(CallConv))
+ Ops.push_back(DAG.getRegisterMask(Mask));
}
if (InFlag.getNode())
/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
-/// with the same restriction that lanes can't be crossed.
+/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
if (!HasAVX)
return false;
return true;
}
-/// getShuffleVPERMILPImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPS/D* instructions.
-static unsigned getShuffleVPERMILPImmediate(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
-
- unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
- unsigned LaneSize = NumElts/NumLanes;
-
- // Although the mask is equal for both lanes do it twice to get the cases
- // where a mask will match because the same mask element is undef on the
- // first half but valid on the second. This would get pathological cases
- // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
- unsigned Shift = (LaneSize == 4) ? 2 : 1;
- unsigned Mask = 0;
- for (unsigned i = 0; i != NumElts; ++i) {
- int MaskElt = SVOp->getMaskElt(i);
- if (MaskElt < 0)
- continue;
- MaskElt %= LaneSize;
- unsigned Shamt = i;
- // VPERMILPSY, the mask of the first half must be equal to the second one
- if (NumElts == 8) Shamt %= LaneSize;
- Mask |= MaskElt << (Shamt*Shift);
- }
-
- return Mask;
-}
-
/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
/// getZeroVector - Returns a vector of specified type with all zero elements.
///
-static SDValue getZeroVector(EVT VT, bool HasSSE2, bool HasAVX2,
+static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
if (VT.getSizeInBits() == 128) { // SSE
- if (HasSSE2) { // SSE2
+ if (Subtarget->hasSSE2()) { // SSE2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else { // SSE1
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
} else if (VT.getSizeInBits() == 256) { // AVX
- if (HasAVX2) { // AVX2
+ if (Subtarget->hasAVX2()) { // AVX2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
-static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
- EVT VT = SVOp->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
-
- bool Changed = false;
- SmallVector<int, 8> MaskVec(SVOp->getMask().begin(), SVOp->getMask().end());
-
+static void NormalizeMask(SmallVectorImpl<int> &Mask, unsigned NumElems) {
for (unsigned i = 0; i != NumElems; ++i) {
- if (MaskVec[i] > (int)NumElems) {
- MaskVec[i] = NumElems;
- Changed = true;
+ if (Mask[i] > (int)NumElems) {
+ Mask[i] = NumElems;
}
}
- if (Changed)
- return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0),
- SVOp->getOperand(1), &MaskVec[0]);
- return SDValue(SVOp, 0);
}
/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
SelectionDAG &DAG) {
EVT VT = V2.getValueType();
SDValue V1 = IsZero
- ? getZeroVector(VT, Subtarget->hasSSE2(), Subtarget->hasAVX2(), DAG,
- V2.getDebugLoc()) : DAG.getUNDEF(VT);
+ ? getZeroVector(VT, Subtarget, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 16> MaskVec;
for (unsigned i = 0; i != NumElems; ++i)
if (Index < 0)
return DAG.getUNDEF(VT.getVectorElementType());
- int NumElems = VT.getVectorNumElements();
- SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1);
+ unsigned NumElems = VT.getVectorNumElements();
+ SDValue NewV = (Index < (int)NumElems) ? SV->getOperand(0)
+ : SV->getOperand(1);
return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
}
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
- int NumElems = VT.getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
SmallVector<unsigned, 16> ShuffleMask;
SDValue ImmN;
DecodeMOVLHPSMask(NumElems, ShuffleMask);
break;
case X86ISD::PSHUFD:
+ case X86ISD::VPERMILP:
ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFMask(NumElems,
- cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask);
break;
case X86ISD::PSHUFHW:
return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
Depth+1);
}
- case X86ISD::VPERMILP:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask);
break;
case X86ISD::MOVDDUP:
case X86ISD::MOVSLDUP:
case X86ISD::PALIGN:
return SDValue(); // Not yet implemented.
- default:
- assert(0 && "unknown target shuffle node");
- return SDValue();
+ default: llvm_unreachable("unknown target shuffle node");
}
Index = ShuffleMask[Index];
if (Index < 0)
return DAG.getUNDEF(VT.getVectorElementType());
- SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+ SDValue NewV = (Index < (int)NumElems) ? N->getOperand(0)
+ : N->getOperand(1);
return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
Depth+1);
}
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
+ const X86Subtarget* Subtarget,
const TargetLowering &TLI) {
if (NumNonZero > 8)
return SDValue();
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
- V = getZeroVector(MVT::v8i16, /*HasSSE2*/ true, /*HasAVX2*/ false,
- DAG, dl);
+ V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else
V = DAG.getUNDEF(MVT::v8i16);
First = false;
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
+ const X86Subtarget* Subtarget,
const TargetLowering &TLI) {
if (NumNonZero > 4)
return SDValue();
if (isNonZero) {
if (First) {
if (NumZero)
- V = getZeroVector(MVT::v8i16, /*HasSSE2*/ true, /*HasAVX2*/ false,
- DAG, dl);
+ V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else
V = DAG.getUNDEF(MVT::v8i16);
First = false;
if (!ISD::isNormalLoad(Ld.getNode()))
return SDValue();
+ // Reject loads that have uses of the chain result
+ if (Ld->hasAnyUseOfValue(1))
+ return SDValue();
+
bool Is256 = VT.getSizeInBits() == 256;
bool Is128 = VT.getSizeInBits() == 128;
unsigned ScalarSize = Ld.getValueType().getSizeInBits();
if (VT == MVT::v4i32 || VT == MVT::v8i32)
return Op;
- return getZeroVector(VT, Subtarget->hasSSE2(),
- Subtarget->hasAVX2(), DAG, dl);
+ return getZeroVector(VT, Subtarget, DAG, dl);
}
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
if (VT.getSizeInBits() == 256) {
- SDValue ZeroVec = getZeroVector(VT, Subtarget->hasSSE2(),
- Subtarget->hasAVX2(), DAG, dl);
+ SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
}
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
if (VT.getSizeInBits() == 256) {
- SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget->hasSSE2(),
- Subtarget->hasAVX2(), DAG, dl);
+ SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
Item = Insert128BitVector(ZeroVec, Item, DAG.getConstant(0, MVT::i32),
DAG, dl);
} else {
// For AVX-length vectors, build the individual 128-bit pieces and use
// shuffles to put them in place.
- if (VT.getSizeInBits() == 256 && !ISD::isBuildVectorAllZeros(Op.getNode())) {
+ if (VT.getSizeInBits() == 256) {
SmallVector<SDValue, 32> V;
- for (unsigned i = 0; i < NumElems; ++i)
+ for (unsigned i = 0; i != NumElems; ++i)
V.push_back(Op.getOperand(i));
EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16) {
SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
- *this);
+ Subtarget, *this);
if (V.getNode()) return V;
}
if (EVTBits == 16 && NumElems == 8) {
SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
- *this);
+ Subtarget, *this);
if (V.getNode()) return V;
}
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
- V[i] = getZeroVector(VT, Subtarget->hasSSE2(), Subtarget->hasAVX2(),
- DAG, dl);
+ V[i] = getZeroVector(VT, Subtarget, DAG, dl);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
}
int MaskVec[] = {
Reverse1 ? 1 : 0,
Reverse1 ? 0 : 1,
- static_cast<int>(Reverse2 ? 1-NumElems : NumElems),
- static_cast<int>(Reverse2 ? NumElems : 1+NumElems)
+ static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
+ static_cast<int>(Reverse2 ? NumElems : NumElems+1)
};
return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
}
// mask values count as coming from any quadword, for better codegen.
unsigned LoQuad[] = { 0, 0, 0, 0 };
unsigned HiQuad[] = { 0, 0, 0, 0 };
- BitVector InputQuads(4);
+ std::bitset<4> InputQuads;
for (unsigned i = 0; i < 8; ++i) {
unsigned *Quad = i < 4 ? LoQuad : HiQuad;
int EltIdx = SVOp->getMaskElt(i);
bool V2Used = InputQuads[2] || InputQuads[3];
if (Subtarget->hasSSSE3()) {
if (InputQuads.count() == 2 && V1Used && V2Used) {
- BestLoQuad = InputQuads.find_first();
- BestHiQuad = InputQuads.find_next(BestLoQuad);
+ BestLoQuad = InputQuads[0] ? 0 : 1;
+ BestHiQuad = InputQuads[2] ? 2 : 3;
}
if (InputQuads.count() > 2) {
BestLoQuad = -1;
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
EVT NewVT;
switch (VT.getSimpleVT().SimpleTy) {
- default: assert(false && "Unexpected!");
+ default: llvm_unreachable("Unexpected!");
case MVT::v4f32: NewVT = MVT::v2f64; break;
case MVT::v4i32: NewVT = MVT::v2i64; break;
case MVT::v8i16: NewVT = MVT::v4i32; break;
SDValue V2 = Op.getOperand(1);
if (isZeroShuffle(SVOp))
- return getZeroVector(VT, Subtarget->hasSSE2(), Subtarget->hasAVX2(),
- DAG, dl);
+ return getZeroVector(VT, Subtarget, DAG, dl);
// Handle splat operations
if (SVOp->isSplat()) {
unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+ if (HasAVX && (VT == MVT::v4f32 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG);
+
if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
V1IsSplat = isSplatVector(V1.getNode());
V2IsSplat = isSplatVector(V2.getNode());
+ SmallVector<int, 8> M(SVOp->getMask().begin(), SVOp->getMask().end());
+
// Canonicalize the splat or undef, if present, to be on the RHS.
- if (V1IsSplat && !V2IsSplat) {
- Op = CommuteVectorShuffle(SVOp, DAG);
- SVOp = cast<ShuffleVectorSDNode>(Op);
- V1 = SVOp->getOperand(0);
- V2 = SVOp->getOperand(1);
+ if (!V2IsUndef && V1IsSplat && !V2IsSplat) {
+ CommuteVectorShuffleMask(M, NumElems);
+ std::swap(V1, V2);
std::swap(V1IsSplat, V2IsSplat);
Commuted = true;
}
- ArrayRef<int> M = SVOp->getMask();
-
if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) {
// Shuffling low element of v1 into undef, just return v1.
if (V2IsUndef)
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
- // new vector_shuffle with the corrected mask.
- SDValue NewMask = NormalizeMask(SVOp, DAG);
- ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask);
- if (NSVOp != SVOp) {
- if (X86::isUNPCKLMask(NSVOp, HasAVX2, true)) {
- return NewMask;
- } else if (X86::isUNPCKHMask(NSVOp, HasAVX2, true)) {
- return NewMask;
- }
+ // new vector_shuffle with the corrected mask.p
+ SmallVector<int, 8> NewMask(M.begin(), M.end());
+ NormalizeMask(NewMask, NumElems);
+ if (isUNPCKLMask(NewMask, VT, HasAVX2, true)) {
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
+ } else if (isUNPCKHMask(NewMask, VT, HasAVX2, true)) {
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
}
}
if (Commuted) {
// Commute is back and try unpck* again.
// FIXME: this seems wrong.
- SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
- ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
+ CommuteVectorShuffleMask(M, NumElems);
+ std::swap(V1, V2);
+ std::swap(V1IsSplat, V2IsSplat);
+ Commuted = false;
- if (X86::isUNPCKLMask(NewSVOp, HasAVX2))
- return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V2, V1, DAG);
+ if (isUNPCKLMask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
- if (X86::isUNPCKHMask(NewSVOp, HasAVX2))
- return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V2, V1, DAG);
+ if (isUNPCKHMask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
}
// Normalize the node to match x86 shuffle ops if needed
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
// Handle VPERMILPS/D* permutations
- if (isVPERMILPMask(M, VT, HasAVX))
+ if (isVPERMILPMask(M, VT, HasAVX)) {
+ if (HasAVX2 && VT == MVT::v8i32)
+ return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
+ X86::getShuffleSHUFImmediate(SVOp), DAG);
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
- getShuffleVPERMILPImmediate(SVOp), DAG);
+ X86::getShuffleSHUFImmediate(SVOp), DAG);
+ }
// Handle VPERM2F128/VPERM2I128 permutations
if (isVPERM2X128Mask(M, VT, HasAVX))
LLVMContext *Context = DAG.getContext();
// Build some magic constants.
- SmallVector<Constant*,4> CV0;
- CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
- CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
- CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
- CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
- Constant *C0 = ConstantVector::get(CV0);
+ const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
+ Constant *C0 = ConstantDataVector::get(*Context, CV0);
SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
SmallVector<Constant*,2> CV1;
if (isFP) {
unsigned SSECC = 8;
EVT EltVT = Op0.getValueType().getVectorElementType();
- assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+ assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT;
bool Swap = false;
}
// Arithmetic intrinsics.
+ case Intrinsic::x86_sse2_pmulu_dq:
+ case Intrinsic::x86_avx2_pmulu_dq:
+ return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse3_hadd_ps:
case Intrinsic::x86_sse3_hadd_pd:
case Intrinsic::x86_avx_hadd_ps_256:
case Intrinsic::x86_avx2_psrav_d_256:
return DAG.getNode(ISD::SRA, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
- case Intrinsic::x86_sse2_pcmpeq_b:
- case Intrinsic::x86_sse2_pcmpeq_w:
- case Intrinsic::x86_sse2_pcmpeq_d:
- case Intrinsic::x86_sse41_pcmpeqq:
- case Intrinsic::x86_avx2_pcmpeq_b:
- case Intrinsic::x86_avx2_pcmpeq_w:
- case Intrinsic::x86_avx2_pcmpeq_d:
- case Intrinsic::x86_avx2_pcmpeq_q:
- return DAG.getNode(X86ISD::PCMPEQ, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
- case Intrinsic::x86_sse2_pcmpgt_b:
- case Intrinsic::x86_sse2_pcmpgt_w:
- case Intrinsic::x86_sse2_pcmpgt_d:
- case Intrinsic::x86_sse42_pcmpgtq:
- case Intrinsic::x86_avx2_pcmpgt_b:
- case Intrinsic::x86_avx2_pcmpgt_w:
- case Intrinsic::x86_avx2_pcmpgt_d:
- case Intrinsic::x86_avx2_pcmpgt_q:
- return DAG.getNode(X86ISD::PCMPGT, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
case Intrinsic::x86_avx2_vperm2i128:
return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::x86_avx_vpermil_ps:
+ case Intrinsic::x86_avx_vpermil_pd:
+ case Intrinsic::x86_avx_vpermil_ps_256:
+ case Intrinsic::x86_avx_vpermil_pd_256:
+ return DAG.getNode(X86ISD::VPERMILP, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
return Lower256IntArith(Op, DAG);
+ assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
+ "Only know how to lower V2I64/V4I64 multiply");
+
DebugLoc dl = Op.getDebugLoc();
+ // Ahi = psrlqi(a, 32);
+ // Bhi = psrlqi(b, 32);
+ //
+ // AloBlo = pmuludq(a, b);
+ // AloBhi = pmuludq(a, Bhi);
+ // AhiBlo = pmuludq(Ahi, b);
+
+ // AloBhi = psllqi(AloBhi, 32);
+ // AhiBlo = psllqi(AhiBlo, 32);
+ // return AloBlo + AloBhi + AhiBlo;
+
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
- if (VT == MVT::v4i64) {
- assert(Subtarget->hasAVX2() && "Lowering v4i64 multiply requires AVX2");
+ SDValue ShAmt = DAG.getConstant(32, MVT::i32);
- // ulong2 Ahi = __builtin_ia32_psrlqi256( a, 32);
- // ulong2 Bhi = __builtin_ia32_psrlqi256( b, 32);
- // ulong2 AloBlo = __builtin_ia32_pmuludq256( a, b );
- // ulong2 AloBhi = __builtin_ia32_pmuludq256( a, Bhi );
- // ulong2 AhiBlo = __builtin_ia32_pmuludq256( Ahi, b );
- //
- // AloBhi = __builtin_ia32_psllqi256( AloBhi, 32 );
- // AhiBlo = __builtin_ia32_psllqi256( AhiBlo, 32 );
- // return AloBlo + AloBhi + AhiBlo;
-
- SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A,
- DAG.getConstant(32, MVT::i32));
- SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B,
- DAG.getConstant(32, MVT::i32));
- SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32),
- A, B);
- SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32),
- A, Bhi);
- SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32),
- Ahi, B);
- AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi,
- DAG.getConstant(32, MVT::i32));
- AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo,
- DAG.getConstant(32, MVT::i32));
- SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
- Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
- return Res;
- }
+ SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt);
+ SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B, ShAmt);
- assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
+ // Bit cast to 32-bit vectors for MULUDQ
+ EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : MVT::v8i32;
+ A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
+ B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
+ Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
+ Bhi = DAG.getNode(ISD::BITCAST, dl, MulVT, Bhi);
- // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
- // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
- // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
- // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
- // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
- //
- // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
- // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
- // return AloBlo + AloBhi + AhiBlo;
+ SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
+ SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
+ SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
+
+ AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi, ShAmt);
+ AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo, ShAmt);
- SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A,
- DAG.getConstant(32, MVT::i32));
- SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B,
- DAG.getConstant(32, MVT::i32));
- SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
- A, B);
- SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
- A, Bhi);
- SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
- Ahi, B);
- AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi,
- DAG.getConstant(32, MVT::i32));
- AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo,
- DAG.getConstant(32, MVT::i32));
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
- Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
- return Res;
+ return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
}
SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
if (Op.getOpcode() == ISD::SRA) {
if (ShiftAmt == 7) {
// R s>> 7 === R s< 0
- SDValue Zeros = getZeroVector(VT, /* HasSSE2 */true,
- /* HasAVX2 */false, DAG, dl);
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
}
if (Op.getOpcode() == ISD::SRA) {
if (ShiftAmt == 7) {
// R s>> 7 === R s< 0
- SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */,
- true /* HasAVX2 */, DAG, dl);
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
}
Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1),
DAG.getConstant(23, MVT::i32));
- ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
- Constant *C = ConstantVector::getSplat(4, CI);
+ const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U};
+ Constant *C = ConstantDataVector::get(*Context, CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
unsigned Reg = 0;
unsigned size = 0;
switch(T.getSimpleVT().SimpleTy) {
- default:
- assert(false && "Invalid value type!");
+ default: llvm_unreachable("Invalid value type!");
case MVT::i8: Reg = X86::AL; size = 1; break;
case MVT::i16: Reg = X86::AX; size = 2; break;
case MVT::i32: Reg = X86::EAX; size = 4; break;
unsigned Opc;
bool ExtraOp = false;
switch (Op.getOpcode()) {
- default: assert(0 && "Invalid code");
+ default: llvm_unreachable("Invalid code");
case ISD::ADDC: Opc = X86ISD::ADD; break;
case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
case ISD::SUBC: Opc = X86ISD::SUB; break;
DebugLoc dl = N->getDebugLoc();
switch (N->getOpcode()) {
default:
- assert(false && "Do not know how to custom type legalize this operation!");
- return;
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
case ISD::SIGN_EXTEND_INREG:
case ISD::ADDC:
case ISD::ADDE:
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
+ case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
return EndMBB;
}
+// The EFLAGS operand of SelectItr might be missing a kill marker
+// because there were multiple uses of EFLAGS, and ISel didn't know
+// which to mark. Figure out whether SelectItr should have had a
+// kill marker, and set it if it should. Returns the correct kill
+// marker value.
+static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
+ MachineBasicBlock* BB,
+ const TargetRegisterInfo* TRI) {
+ // Scan forward through BB for a use/def of EFLAGS.
+ MachineBasicBlock::iterator miI(llvm::next(SelectItr));
+ for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
+ const MachineInstr& mi = *miI;
+ if (mi.readsRegister(X86::EFLAGS))
+ return false;
+ if (mi.definesRegister(X86::EFLAGS))
+ break; // Should have kill-flag - update below.
+ }
+
+ // If we hit the end of the block, check whether EFLAGS is live into a
+ // successor.
+ if (miI == BB->end()) {
+ for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
+ sEnd = BB->succ_end();
+ sItr != sEnd; ++sItr) {
+ MachineBasicBlock* succ = *sItr;
+ if (succ->isLiveIn(X86::EFLAGS))
+ return false;
+ }
+ }
+
+ // We found a def, or hit the end of the basic block and EFLAGS wasn't live
+ // out. SelectMI should have a kill flag on EFLAGS.
+ SelectItr->addRegisterKilled(X86::EFLAGS, TRI);
+ return true;
+}
+
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *BB) const {
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
- if (!MI->killsRegister(X86::EFLAGS)) {
+ const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+ if (!MI->killsRegister(X86::EFLAGS) &&
+ !checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
sinkMBB->addLiveIn(X86::EFLAGS);
}
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
- default: assert(0 && "Unexpected instr type to insert");
+ default: llvm_unreachable("Unexpected instr type to insert");
case X86::TAILJMPd64:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
- assert(0 && "TAILJMP64 would not be touched here.");
+ llvm_unreachable("TAILJMP64 would not be touched here.");
case X86::TCRETURNdi64:
case X86::TCRETURNri64:
case X86::TCRETURNmi64:
case Intrinsic::x86_avx2_pmovmskb: {
// High bits of movmskp{s|d}, pmovmskb are known zero.
switch (IntId) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break;
case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break;
case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break;
/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- bool HasAVX2) {
+ const X86Subtarget* Subtarget) {
DebugLoc dl = N->getDebugLoc();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
SDValue V1 = SVOp->getOperand(0);
// Emit a zeroed vector and insert the desired subvector on its
// first half.
- SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, HasAVX2, DAG, dl);
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0),
DAG.getConstant(0, MVT::i32), DAG, dl);
return DCI.CombineTo(N, InsV);
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
- return PerformShuffleCombine256(N, DAG, DCI, Subtarget->hasAVX2());
+ return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
// Only handle 128 wide vector from here on.
if (VT.getSizeInBits() != 128)
return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
}
+
+/// PerformTruncateCombine - Converts truncate operation to
+/// a sequence of vector shuffle operations.
+/// It is possible when we truncate 256-bit vector to 128-bit vector
+
+SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
+ DAGCombinerInfo &DCI) const {
+ if (!DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ if (!Subtarget->hasAVX()) return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) {
+
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
+ DAG.getIntPtrConstant(0));
+
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
+ DAG.getIntPtrConstant(2));
+
+ OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
+
+ // PSHUFD
+ int ShufMask1[] = {0, 2, 0, 0};
+
+ OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT),
+ ShufMask1);
+ OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT),
+ ShufMask1);
+
+ // MOVLHPS
+ int ShufMask2[] = {0, 1, 4, 5};
+
+ return DAG.getVectorShuffle(VT, dl, OpLo, OpHi, ShufMask2);
+ }
+ if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) {
+
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
+ DAG.getIntPtrConstant(0));
+
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
+ DAG.getIntPtrConstant(4));
+
+ OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpHi);
+
+ // PSHUFB
+ int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+
+ OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo,
+ DAG.getUNDEF(MVT::v16i8),
+ ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi,
+ DAG.getUNDEF(MVT::v16i8),
+ ShufMask1);
+
+ OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
+
+ // MOVLHPS
+ int ShufMask2[] = {0, 1, 4, 5};
+
+ SDValue res = DAG.getVectorShuffle(MVT::v4i32, dl, OpLo, OpHi, ShufMask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, res);
+ }
+
+ return SDValue();
+}
+
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
/// to a simple store and scalar loads to extract the elements.
/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
/// when possible.
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (N->getOpcode() == ISD::SHL) {
BaseShAmt = InVec.getOperand(1);
}
}
- if (BaseShAmt.getNode() == 0)
+ if (BaseShAmt.getNode() == 0) {
+ // Don't create instructions with illegal types after legalize
+ // types has run.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) &&
+ !DCI.isBeforeLegalize())
+ return SDValue();
+
BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
DAG.getIntPtrConstant(0));
+ }
} else
return SDValue();
Ld = cast<LoadSDNode>(St->getChain());
else if (St->getValue().hasOneUse() &&
ChainVal->getOpcode() == ISD::TokenFactor) {
- for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) {
if (ChainVal->getOperand(i).getNode() == LdVal) {
TokenFactorIndex = i;
Ld = cast<LoadSDNode>(St->getValue());
return SDValue();
}
+static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (!DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ if (!Subtarget->hasAVX())
+ return SDValue();
+
+ // Optimize vectors in AVX mode
+ // Sign extend v8i16 to v8i32 and
+ // v4i32 to v4i64
+ //
+ // Divide input vector into two parts
+ // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
+ // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
+ // concat the vectors to original VT
+
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
+ (VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
+
+ unsigned NumElems = OpVT.getVectorNumElements();
+ SmallVector<int,8> ShufMask1(NumElems, -1);
+ for (unsigned i = 0; i < NumElems/2; i++) ShufMask1[i] = i;
+
+ SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
+ ShufMask1.data());
+
+ SmallVector<int,8> ShufMask2(NumElems, -1);
+ for (unsigned i = 0; i < NumElems/2; i++) ShufMask2[i] = i + NumElems/2;
+
+ SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
+ ShufMask2.data());
+
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ VT.getVectorNumElements()/2);
+
+ OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
+ OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+ }
+ return SDValue();
+}
+
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
// (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) ||
((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) {
- SDValue ZeroVec = getZeroVector(OpVT, Subtarget->hasSSE2(), Subtarget->hasAVX2(),
- DAG, dl);
+ SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
SDValue OpLo = getTargetShuffleNode(X86ISD::UNPCKL, dl, OpVT, N0, ZeroVec, DAG);
SDValue OpHi = getTargetShuffleNode(X86ISD::UNPCKH, dl, OpVT, N0, ZeroVec, DAG);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
case ISD::SRA:
- case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
+ case ISD::SRL: return PerformShiftCombine(N, DAG, DCI, Subtarget);
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, Subtarget);
+ case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
+ case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::PALIGN: