// Forward declarations.
static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG);
-X86TargetLowering::X86TargetLowering(TargetMachine &TM)
+X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
: TargetLowering(TM) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
if (!Subtarget->hasSSE2())
setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+ // Expand certain atomics
setOperationAction(ISD::ATOMIC_LCS , MVT::i8, Custom);
setOperationAction(ISD::ATOMIC_LCS , MVT::i16, Custom);
setOperationAction(ISD::ATOMIC_LCS , MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LCS , MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LSS , MVT::i32, Expand);
// Use the default ISD::LOCATION, ISD::DECLARE expansion.
setOperationAction(ISD::LOCATION, MVT::Other, Expand);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- setOperationAction(ISD::VAARG , MVT::Other, Expand);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
- if (Subtarget->is64Bit())
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
- else
+ } else {
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ }
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::ROTL, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::ROTR, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::BSWAP, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::VSETCC, (MVT::ValueType)VT, Expand);
}
if (Subtarget->hasMMX()) {
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v4f32, Legal);
}
if (Subtarget->hasSSE2()) {
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v2f64, Legal);
+ setOperationAction(ISD::VSETCC, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSETCC, MVT::v8i16, Legal);
+ setOperationAction(ISD::VSETCC, MVT::v4i32, Legal);
+ setOperationAction(ISD::VSETCC, MVT::v2i64, Legal);
+
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+
}
if (Subtarget->hasSSE41()) {
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ setOperationAction(ISD::MUL, MVT::v2i64, Legal);
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::STORE);
return Align;
}
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
+/// determining it.
+MVT::ValueType
+X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
+ bool isSrcConst, bool isSrcStr) const {
+ if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
+ return MVT::v4i32;
+ if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
+ return MVT::v4f32;
+ if (Subtarget->is64Bit() && Size >= 8)
+ return MVT::i64;
+ return MVT::i32;
+}
+
+
/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table,
}
/// isScalarLoadToVector - Returns true if the node is a scalar load that
-/// is promoted to a vector.
-static inline bool isScalarLoadToVector(SDNode *N) {
+/// is promoted to a vector. It also returns the LoadSDNode by reference if
+/// required.
+static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
N = N->getOperand(0).Val;
- return ISD::isNON_EXTLoad(N);
+ if (ISD::isNON_EXTLoad(N)) {
+ if (LD)
+ *LD = cast<LoadSDNode>(N);
+ return true;
+ }
}
return false;
}
/// getZeroVector - Returns a vector of specified type with all zero elements.
///
-static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
+static SDOperand getZeroVector(MVT::ValueType VT, bool HasSSE2,
+ SelectionDAG &DAG) {
assert(MVT::isVector(VT) && "Expected a vector type");
// Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest
// type. This ensures they get CSE'd.
- SDOperand Cst = DAG.getTargetConstant(0, MVT::i32);
SDOperand Vec;
- if (MVT::getSizeInBits(VT) == 64) // MMX
+ if (MVT::getSizeInBits(VT) == 64) { // MMX
+ SDOperand Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
- else // SSE
+ } else if (HasSSE2) { // SSE2
+ SDOperand Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
+ } else { // SSE1
+ SDOperand Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4f32, Cst, Cst, Cst, Cst);
+ }
return DAG.getNode(ISD::BIT_CONVERT, VT, Vec);
}
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
NumElems >>= 1;
}
- Mask = getZeroVector(MVT::v4i32, DAG);
+ Mask = getZeroVector(MVT::v4i32, true, DAG);
}
V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1);
/// element of V2 is swizzled into the zero/undef vector, landing at element
/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, unsigned Idx,
- bool isZero, SelectionDAG &DAG) {
+ bool isZero, bool HasSSE2,
+ SelectionDAG &DAG) {
MVT::ValueType VT = V2.getValueType();
- SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
+ SDOperand V1 = isZero
+ ? getZeroVector(VT, HasSSE2, DAG) : DAG.getNode(ISD::UNDEF, VT);
unsigned NumElems = MVT::getVectorNumElements(V2.getValueType());
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
+/// getNumOfConsecutiveZeros - Return the number of elements in a result of
+/// a shuffle that is zero.
+static
+unsigned getNumOfConsecutiveZeros(SDOperand Op, SDOperand Mask,
+ unsigned NumElems, bool Low,
+ SelectionDAG &DAG) {
+ unsigned NumZeros = 0;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDOperand Idx = Mask.getOperand(Low ? i : NumElems-i-1);
+ if (Idx.getOpcode() == ISD::UNDEF) {
+ ++NumZeros;
+ continue;
+ }
+ unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
+ SDOperand Elt = DAG.getShuffleScalarElt(Op.Val, Index);
+ if (Elt.Val && isZeroNode(Elt))
+ ++NumZeros;
+ else
+ break;
+ }
+ return NumZeros;
+}
+
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+static bool isVectorShift(SDOperand Op, SDOperand Mask, SelectionDAG &DAG,
+ bool &isLeft, SDOperand &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = Mask.getNumOperands();
+
+ isLeft = true;
+ unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG);
+ if (!NumZeros) {
+ isLeft = false;
+ NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG);
+ if (!NumZeros)
+ return false;
+ }
+
+ bool SeenV1 = false;
+ bool SeenV2 = false;
+ for (unsigned i = NumZeros; i < NumElems; ++i) {
+ unsigned Val = isLeft ? (i - NumZeros) : i;
+ SDOperand Idx = Mask.getOperand(isLeft ? i : (i - NumZeros));
+ if (Idx.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
+ if (Index < NumElems)
+ SeenV1 = true;
+ else {
+ Index -= NumElems;
+ SeenV2 = true;
+ }
+ if (Index != Val)
+ return false;
+ }
+ if (SeenV1 && SeenV2)
+ return false;
+
+ ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1);
+ ShAmt = NumZeros;
+ return true;
+}
+
+
/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
///
static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
- V = getZeroVector(MVT::v8i16, DAG);
+ V = getZeroVector(MVT::v8i16, true, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
if (isNonZero) {
if (First) {
if (NumZero)
- V = getZeroVector(MVT::v8i16, DAG);
+ V = getZeroVector(MVT::v8i16, true, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
return V;
}
+/// getVShift - Return a vector logical shift node.
+///
+static SDOperand getVShift(bool isLeft, MVT::ValueType VT, SDOperand SrcOp,
+ unsigned NumBits, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ bool isMMX = MVT::getSizeInBits(VT) == 64;
+ MVT::ValueType ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
+ unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
+ SrcOp = DAG.getNode(ISD::BIT_CONVERT, ShVT, SrcOp);
+ return DAG.getNode(ISD::BIT_CONVERT, VT,
+ DAG.getNode(Opc, ShVT, SrcOp,
+ DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
+}
+
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
if (ISD::isBuildVectorAllOnes(Op.Val))
return getOnesVector(Op.getValueType(), DAG);
- return getZeroVector(Op.getValueType(), DAG);
+ return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG);
}
MVT::ValueType VT = Op.getValueType();
// convert it to a vector with movd (S2V+shuffle to zero extend).
Item = DAG.getNode(ISD::TRUNCATE, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VecVT, Item);
- Item = getShuffleVectorZeroOrUndef(Item, 0, true, DAG);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+ Subtarget->hasSSE2(), DAG);
// Now we have our 32-bit value zero extended in the low element of
// a vector. If Idx != 0, swizzle it into place.
(EVT != MVT::i64 || Subtarget->is64Bit())) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
- return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, DAG);
+ return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
+ Subtarget->hasSSE2(), DAG);
+ }
+
+ // Is it a vector logical left shift?
+ if (NumElems == 2 && Idx == 1 &&
+ isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) {
+ unsigned NumBits = MVT::getSizeInBits(VT);
+ return getVShift(true, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(1)),
+ NumBits/2, DAG, *this);
}
if (IsAllConstants) // Otherwise, it's better to do a constpool load.
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a shuffle of zero and zero-extended scalar to vector.
- Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, DAG);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
+ Subtarget->hasSSE2(), DAG);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
return SDOperand();
// Let legalizer expand 2-wide build_vectors.
- if (EVTBits == 64)
+ if (EVTBits == 64) {
+ if (NumNonZero == 1) {
+ // One half is zero or undef.
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDOperand V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT,
+ Op.getOperand(Idx));
+ return getShuffleVectorZeroOrUndef(V2, Idx, true,
+ Subtarget->hasSSE2(), DAG);
+ }
return SDOperand();
+ }
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16) {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
- V[i] = getZeroVector(VT, DAG);
+ V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
}
}
}
- // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
- // clears the upper bits.
- // FIXME: we can do the same for v4f32 case when we know both parts of
- // the lower half come from scalar_to_vector (loadf32). We should do
- // that in post legalizer dag combiner with target specific hooks.
- if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
- return V[0];
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
&MaskVec[0], MaskVec.size()));
}
+/// getVZextMovL - Return a zero-extending vector move low node.
+///
+static SDOperand getVZextMovL(MVT::ValueType VT, MVT::ValueType OpVT,
+ SDOperand SrcOp, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+ LoadSDNode *LD = NULL;
+ if (!isScalarLoadToVector(SrcOp.Val, &LD))
+ LD = dyn_cast<LoadSDNode>(SrcOp);
+ if (!LD) {
+ // movssrr and movsdrr do not clear top bits. Try to use movd, movq
+ // instead.
+ MVT::ValueType EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
+ if ((EVT != MVT::i64 || Subtarget->is64Bit()) &&
+ SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
+ SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) {
+ // PR2108
+ OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
+ return DAG.getNode(ISD::BIT_CONVERT, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, OpVT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT,
+ SrcOp.getOperand(0).getOperand(0))));
+ }
+ }
+ }
+
+ return DAG.getNode(ISD::BIT_CONVERT, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, OpVT,
+ DAG.getNode(ISD::BIT_CONVERT, OpVT, SrcOp)));
+}
+
SDOperand
X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
return DAG.getNode(ISD::UNDEF, VT);
if (isZeroShuffle(Op.Val))
- return getZeroVector(VT, DAG);
+ return getZeroVector(VT, Subtarget->hasSSE2(), DAG);
if (isIdentityMask(PermMask.Val))
return V1;
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.Val)) {
- SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
+ DAG, *this);
if (NewOp.Val) {
SDOperand NewV1 = NewOp.getOperand(0);
SDOperand NewV2 = NewOp.getOperand(1);
SDOperand NewMask = NewOp.getOperand(2);
if (isCommutedMOVL(NewMask.Val, true, false)) {
NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
- NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(),
- NewV1, NewV2, getMOVLMask(2, DAG));
- return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget);
}
}
} else if (ISD::isBuildVectorAllZeros(V1.Val)) {
- SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
+ DAG, *this);
if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
- return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
+ DAG, Subtarget);
}
}
- if (X86::isMOVLMask(PermMask.Val))
- return (V1IsUndef) ? V2 : Op;
+ // Check if this can be converted into a logical shift.
+ bool isLeft = false;
+ unsigned ShAmt = 0;
+ SDOperand ShVal;
+ bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt);
+ if (isShift && ShVal.hasOneUse()) {
+ // If the shifted value has multiple uses, it may be cheaper to use
+ // v_set0 + movlhps or movhlps, etc.
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ ShAmt *= MVT::getSizeInBits(EVT);
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this);
+ }
+
+ if (X86::isMOVLMask(PermMask.Val)) {
+ if (V1IsUndef)
+ return V2;
+ if (ISD::isBuildVectorAllZeros(V1.Val))
+ return getVZextMovL(VT, VT, V2, DAG, Subtarget);
+ return Op;
+ }
if (X86::isMOVSHDUPMask(PermMask.Val) ||
X86::isMOVSLDUPMask(PermMask.Val) ||
ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
+ if (isShift) {
+ // No better options. Use a vshl / vsrl.
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ ShAmt *= MVT::getSizeInBits(EVT);
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this);
+ }
+
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
}
+SDOperand X86TargetLowering::LowerVAARG(SDOperand Op, SelectionDAG &DAG) {
+ // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+ assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand SrcPtr = Op.getOperand(1);
+ SDOperand SrcSV = Op.getOperand(2);
+
+ assert(0 && "VAArgInst is not yet implemented for x86-64!");
+ abort();
+ return SDOperand();
+}
+
SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) {
// X86-64 va_list is a struct { i32, i32, i8*, i8* }.
assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r);
const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri);
- const unsigned char N86R10 =
- ((const X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R10);
- const unsigned char N86R11 =
- ((const X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R11);
+ const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10);
+ const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11);
const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr);
const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
- const unsigned char N86Reg =
- ((const X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg);
+ const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
Trmp, TrmpAddr, 0);
return DAG.getNode(ISD::MERGE_VALUES, Tys, ResultVal, cpOutH.getValue(1)).Val;
}
+SDNode* X86TargetLowering::ExpandATOMIC_LSS(SDNode* Op, SelectionDAG &DAG) {
+ MVT::ValueType T = cast<AtomicSDNode>(Op)->getVT();
+ assert (T == MVT::i32 && "Only know how to expand i32 LSS");
+ SDOperand negOp = DAG.getNode(ISD::SUB, T,
+ DAG.getConstant(0, T), Op->getOperand(2));
+ return DAG.getAtomic(ISD::ATOMIC_LAS, Op->getOperand(0),
+ Op->getOperand(1), negOp, T).Val;
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
case ISD::RET: return LowerRET(Op, DAG);
case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG);
case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG);
case ISD::ATOMIC_LCS: return ExpandATOMIC_LCS(N, DAG);
+ case ISD::ATOMIC_LSS: return ExpandATOMIC_LSS(N,DAG);
}
}
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
- case X86ISD::LCMPXCHG_DAG: return "x86ISD::LCMPXCHG_DAG";
- case X86ISD::LCMPXCHG8_DAG: return "x86ISD::LCMPXCHG8_DAG";
+ case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
+ case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
+ case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
+ case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VSHL: return "X86ISD::VSHL";
+ case X86ISD::VSRL: return "X86ISD::VSRL";
}
}
// X86 Scheduler Hooks
//===----------------------------------------------------------------------===//
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
+ MachineBasicBlock *MBB,
+ unsigned regOpc,
+ unsigned immOpc) {
+ // For the atomic bitwise operator, we generate
+ // thisMBB:
+ // newMBB:
+ // ld t1 = [bitinstr.addr]
+ // op t2 = t1, [bitinstr.val]
+ // mov EAX = t1
+ // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // bz newMBB
+ // fallthrough -->nextMBB
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ ilist<MachineBasicBlock>::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB);
+ F->getBasicBlockList().insert(MBBIter, newMBB);
+ F->getBasicBlockList().insert(MBBIter, nextMBB);
+
+ // Move all successors to thisMBB to nextMBB
+ nextMBB->transferSuccessors(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to itself and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ // Insert instructions into newMBB based on incoming instruction
+ assert(bInstr->getNumOperands() < 8 && "unexpected number of operands");
+ MachineOperand& destOper = bInstr->getOperand(0);
+ MachineOperand* argOpers[6];
+ int numArgs = bInstr->getNumOperands() - 1;
+ for (int i=0; i < numArgs; ++i)
+ argOpers[i] = &bInstr->getOperand(i+1);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = 3; // [0,3]
+ int valArgIndx = 4;
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm())
+ && "invalid operand");
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, TII->get(regOpc), t2);
+ else
+ MIB = BuildMI(newMBB, TII->get(immOpc), t2);
+ MIB.addReg(t1);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+
+ MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), X86::EAX);
+ MIB.addReg(t1);
+
+ MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MIB.addReg(t2);
+
+ MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg());
+ MIB.addReg(X86::EAX);
+
+ // insert branch
+ BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB);
+
+ delete bInstr; // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
+ MachineBasicBlock *MBB,
+ unsigned cmovOpc) {
+ // For the atomic min/max operator, we generate
+ // thisMBB:
+ // newMBB:
+ // ld t1 = [min/max.addr]
+ // mov t2 = [min/max.val]
+ // cmp t1, t2
+ // cmov[cond] t2 = t1
+ // mov EAX = t1
+ // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // bz newMBB
+ // fallthrough -->nextMBB
+ //
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ ilist<MachineBasicBlock>::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB);
+ F->getBasicBlockList().insert(MBBIter, newMBB);
+ F->getBasicBlockList().insert(MBBIter, nextMBB);
+
+ // Move all successors to thisMBB to nextMBB
+ nextMBB->transferSuccessors(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to newMBB and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ // Insert instructions into newMBB based on incoming instruction
+ assert(mInstr->getNumOperands() < 8 && "unexpected number of operands");
+ MachineOperand& destOper = mInstr->getOperand(0);
+ MachineOperand* argOpers[6];
+ int numArgs = mInstr->getNumOperands() - 1;
+ for (int i=0; i < numArgs; ++i)
+ argOpers[i] = &mInstr->getOperand(i+1);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = 3; // [0,3]
+ int valArgIndx = 4;
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ // We only support register and immediate values
+ assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm())
+ && "invalid operand");
+
+ unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2);
+ else
+ MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+
+ MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), X86::EAX);
+ MIB.addReg(t1);
+
+ MIB = BuildMI(newMBB, TII->get(X86::CMP32rr));
+ MIB.addReg(t1);
+ MIB.addReg(t2);
+
+ // Generate movc
+ unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MIB = BuildMI(newMBB, TII->get(cmovOpc),t3);
+ MIB.addReg(t2);
+ MIB.addReg(t1);
+
+ // Cmp and exchange if none has modified the memory location
+ MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MIB.addReg(t3);
+
+ MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg());
+ MIB.addReg(X86::EAX);
+
+ // insert branch
+ BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB);
+
+ delete mInstr; // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+
MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) {
MachineFunction *F = BB->getParent();
F->getBasicBlockList().insert(It, copy0MBB);
F->getBasicBlockList().insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
+ // Update machine-CFG edges by transferring all successors of the current
// block to the new block which will contain the Phi node for the select.
- for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
- e = BB->succ_end(); i != e; ++i)
- sinkMBB->addSuccessor(*i);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while(!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+ sinkMBB->transferSuccessors(BB);
+
+ // Add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
delete MI; // The pseudo instruction is gone now.
return BB;
}
+ case X86::ATOMAND32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+ X86::AND32ri);
+ case X86::ATOMOR32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
+ X86::OR32ri);
+ case X86::ATOMXOR32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
+ X86::XOR32ri);
+ case X86::ATOMMIN32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
+ case X86::ATOMMAX32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
+ case X86::ATOMUMIN32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
+ case X86::ATOMUMAX32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
}
}
}
}
-/// getShuffleScalarElt - Returns the scalar element that will make up the ith
-/// element of the result of the vector shuffle.
-static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
- MVT::ValueType VT = N->getValueType(0);
- SDOperand PermMask = N->getOperand(2);
- unsigned NumElems = PermMask.getNumOperands();
- SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
- i %= NumElems;
- if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- return (i == 0)
- ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
- } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
- SDOperand Idx = PermMask.getOperand(i);
- if (Idx.getOpcode() == ISD::UNDEF)
- return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
- return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
- }
- return SDOperand();
-}
-
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
-/// node is a GlobalAddress + an offset.
-static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) {
- unsigned Opc = N->getOpcode();
- if (Opc == X86ISD::Wrapper) {
- if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
+/// node is a GlobalAddress + offset.
+bool X86TargetLowering::isGAPlusOffset(SDNode *N,
+ GlobalValue* &GA, int64_t &Offset) const{
+ if (N->getOpcode() == X86ISD::Wrapper) {
+ if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
return true;
}
- } else if (Opc == ISD::ADD) {
- SDOperand N1 = N->getOperand(0);
- SDOperand N2 = N->getOperand(1);
- if (isGAPlusOffset(N1.Val, GA, Offset)) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
- if (V) {
- Offset += V->getSignExtended();
- return true;
- }
- } else if (isGAPlusOffset(N2.Val, GA, Offset)) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
- if (V) {
- Offset += V->getSignExtended();
- return true;
- }
- }
}
- return false;
+ return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-/// isConsecutiveLoad - Returns true if N is loading from an address of Base
-/// + Dist * Size.
-static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
- MachineFrameInfo *MFI) {
- if (N->getOperand(0).Val != Base->getOperand(0).Val)
- return false;
-
- SDOperand Loc = N->getOperand(1);
- SDOperand BaseLoc = Base->getOperand(1);
- if (Loc.getOpcode() == ISD::FrameIndex) {
- if (BaseLoc.getOpcode() != ISD::FrameIndex)
- return false;
- int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
- int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
- int FS = MFI->getObjectSize(FI);
- int BFS = MFI->getObjectSize(BFI);
- if (FS != BFS || FS != Size) return false;
- return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
- } else {
- GlobalValue *GV1 = NULL;
- GlobalValue *GV2 = NULL;
- int64_t Offset1 = 0;
- int64_t Offset2 = 0;
- bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
- bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
- if (isGA1 && isGA2 && GV1 == GV2)
- return Offset1 == (Offset2 + Dist*Size);
- }
-
- return false;
-}
-
-static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
- const X86Subtarget *Subtarget) {
+static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
+ const TargetLowering &TLI) {
GlobalValue *GV;
int64_t Offset = 0;
- if (isGAPlusOffset(Base, GV, Offset))
- return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
+ if (TLI.isGAPlusOffset(Base, GV, Offset))
+ return (GV->getAlignment() >= N && (Offset % N) == 0);
// DAG combine handles the stack object case.
return false;
}
+static bool EltsFromConsecutiveLoads(SDNode *N, SDOperand PermMask,
+ unsigned NumElems, MVT::ValueType EVT,
+ SDNode *&Base,
+ SelectionDAG &DAG, MachineFrameInfo *MFI,
+ const TargetLowering &TLI) {
+ Base = NULL;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDOperand Idx = PermMask.getOperand(i);
+ if (Idx.getOpcode() == ISD::UNDEF) {
+ if (!Base)
+ return false;
+ continue;
+ }
+
+ unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
+ SDOperand Elt = DAG.getShuffleScalarElt(N, Index);
+ if (!Elt.Val ||
+ (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.Val)))
+ return false;
+ if (!Base) {
+ Base = Elt.Val;
+ if (Base->getOpcode() == ISD::UNDEF)
+ return false;
+ continue;
+ }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+
+ if (!TLI.isConsecutiveLoad(Elt.Val, Base,
+ MVT::getSizeInBits(EVT)/8, i, MFI))
+ return false;
+ }
+ return true;
+}
/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
/// if the load addresses are consecutive, non-overlapping, and in the right
/// order.
static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetLowering &TLI) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MVT::ValueType VT = N->getValueType(0);
MVT::ValueType EVT = MVT::getVectorElementType(VT);
SDOperand PermMask = N->getOperand(2);
- int NumElems = (int)PermMask.getNumOperands();
+ unsigned NumElems = PermMask.getNumOperands();
SDNode *Base = NULL;
- for (int i = 0; i < NumElems; ++i) {
- SDOperand Idx = PermMask.getOperand(i);
- if (Idx.getOpcode() == ISD::UNDEF) {
- if (!Base) return SDOperand();
- } else {
- SDOperand Arg =
- getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
- if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val))
- return SDOperand();
- if (!Base)
- Base = Arg.Val;
- else if (!isConsecutiveLoad(Arg.Val, Base,
- i, MVT::getSizeInBits(EVT)/8,MFI))
- return SDOperand();
- }
- }
+ if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base,
+ DAG, MFI, TLI))
+ return SDOperand();
- bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
LoadSDNode *LD = cast<LoadSDNode>(Base);
- if (isAlign16) {
+ if (isBaseAlignmentOfN(16, Base->getOperand(1).Val, TLI))
return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
LD->getSrcValueOffset(), LD->isVolatile());
- } else {
- return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
- LD->getSrcValueOffset(), LD->isVolatile(),
- LD->getAlignment());
- }
+ return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->isVolatile(),
+ LD->getAlignment());
}
+/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
+static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget,
+ const TargetLowering &TLI) {
+ unsigned NumOps = N->getNumOperands();
+
+ // Ignore single operand BUILD_VECTOR.
+ if (NumOps == 1)
+ return SDOperand();
+
+ MVT::ValueType VT = N->getValueType(0);
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
+ // We are looking for load i64 and zero extend. We want to transform
+ // it before legalizer has a chance to expand it. Also look for i64
+ // BUILD_PAIR bit casted to f64.
+ return SDOperand();
+ // This must be an insertion into a zero vector.
+ SDOperand HighElt = N->getOperand(1);
+ if (!isZeroNode(HighElt))
+ return SDOperand();
+
+ // Value must be a load.
+ SDNode *Base = N->getOperand(0).Val;
+ if (!isa<LoadSDNode>(Base)) {
+ if (Base->getOpcode() != ISD::BIT_CONVERT)
+ return SDOperand();
+ Base = Base->getOperand(0).Val;
+ if (!isa<LoadSDNode>(Base))
+ return SDOperand();
+ }
+
+ // Transform it into VZEXT_LOAD addr.
+ LoadSDNode *LD = cast<LoadSDNode>(Base);
+
+ // Load must not be an extload.
+ if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDOperand();
+
+ return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr());
+}
+
/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
}
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
-static SDOperand PerformSTORECombine(StoreSDNode *St, SelectionDAG &DAG,
+static SDOperand PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
// places to insert EMMS. This qualifies as a quick hack.
+ StoreSDNode *St = cast<StoreSDNode>(N);
if (MVT::isVector(St->getValue().getValueType()) &&
MVT::getSizeInBits(St->getValue().getValueType()) == 64 &&
isa<LoadSDNode>(St->getValue()) &&
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
- case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, Subtarget);
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
+ case ISD::BUILD_VECTOR:
+ return PerformBuildVectorCombine(N, DAG, Subtarget, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
- case ISD::STORE:
- return PerformSTORECombine(cast<StoreSDNode>(N), DAG, Subtarget);
+ case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);