setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ if (Subtarget->isOSWindows() && !Subtarget->isTargetEnvMacho())
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
else if (TM.Options.EnableSegmentedStacks)
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
-
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
setOperationAction(ISD::SDIV, MVT::v16i32, Custom);
-
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
+ }
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::AND, MVT::v8i64, Legal);
setOperationAction(ISD::OR, MVT::v8i64, Legal);
setOperationAction(ISD::XOR, MVT::v8i64, Legal);
+ setOperationAction(ISD::AND, MVT::v16i32, Legal);
+ setOperationAction(ISD::OR, MVT::v16i32, Legal);
+ setOperationAction(ISD::XOR, MVT::v16i32, Legal);
// Custom lower several nodes.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
if (!VT.is512BitVector())
continue;
- if (VT != MVT::v8i64) {
- setOperationAction(ISD::XOR, VT, Promote);
- AddPromotedToType (ISD::XOR, VT, MVT::v8i64);
- setOperationAction(ISD::OR, VT, Promote);
- AddPromotedToType (ISD::OR, VT, MVT::v8i64);
- setOperationAction(ISD::AND, VT, Promote);
- AddPromotedToType (ISD::AND, VT, MVT::v8i64);
- }
if ( EltSize >= 32) {
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
if (!VT.is512BitVector())
continue;
- setOperationAction(ISD::LOAD, VT, Promote);
- AddPromotedToType (ISD::LOAD, VT, MVT::v8i64);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
return false;
unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLanes = VT.is512BitVector() ? 1: VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
// Do not handle 64-bit element shuffles with palignr.
/// specifies a shuffle of elements that is suitable for input to 128/256-bit
/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
/// reverse of what x86 shuffles want.
-static bool isSHUFPMask(ArrayRef<int> Mask, MVT VT, bool HasFp256,
- bool Commuted = false) {
- if (!HasFp256 && VT.is256BitVector())
- return false;
+static bool isSHUFPMask(ArrayRef<int> Mask, MVT VT, bool Commuted = false) {
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
if (NumLaneElems != 2 && NumLaneElems != 4)
return false;
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ bool symetricMaskRequired =
+ (VT.getSizeInBits() >= 256) && (EltSize == 32);
+
// VSHUFPSY divides the resulting vector into 4 chunks.
// The sources are also splitted into 4 chunks, and each destination
// chunk must come from a different source chunk.
//
// DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
//
+ SmallVector<int, 4> MaskVal(NumLaneElems, -1);
unsigned HalfLaneElems = NumLaneElems/2;
for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
for (unsigned i = 0; i != NumLaneElems; ++i) {
// For VSHUFPSY, the mask of the second half must be the same as the
// first but with the appropriate offsets. This works in the same way as
// VPERMILPS works with masks.
- if (NumElems != 8 || l == 0 || Mask[i] < 0)
+ if (!symetricMaskRequired || Idx < 0)
continue;
- if (!isUndefOrEqual(Idx, Mask[i]+l))
+ if (MaskVal[i] < 0) {
+ MaskVal[i] = Idx - l;
+ continue;
+ }
+ if ((signed)(Idx - l) != MaskVal[i])
return false;
}
}
static bool isUNPCKLMask(ArrayRef<int> Mask, MVT VT,
bool HasInt256, bool V2IsSplat = false) {
- if (VT.is512BitVector())
- return false;
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
- "Unsupported vector type for unpckh");
+ assert(VT.getSizeInBits() >= 128 &&
+ "Unsupported vector type for unpckl");
+ // AVX defines UNPCK* to operate independently on 128-bit lanes.
+ unsigned NumLanes;
+ unsigned NumOf256BitLanes;
unsigned NumElts = VT.getVectorNumElements();
- if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
- (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ if (VT.is256BitVector()) {
+ if (NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
+ NumLanes = 2;
+ NumOf256BitLanes = 1;
+ } else if (VT.is512BitVector()) {
+ assert(VT.getScalarType().getSizeInBits() >= 32 &&
+ "Unsupported vector type for unpckh");
+ NumLanes = 2;
+ NumOf256BitLanes = 2;
+ } else {
+ NumLanes = 1;
+ NumOf256BitLanes = 1;
+ }
- // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
- // independently on 128-bit lanes.
- unsigned NumLanes = VT.getSizeInBits()/128;
- unsigned NumLaneElts = NumElts/NumLanes;
+ unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
+ unsigned NumLaneElts = NumEltsInStride/NumLanes;
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
- int BitI = Mask[l+i];
- int BitI1 = Mask[l+i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (V2IsSplat) {
- if (!isUndefOrEqual(BitI1, NumElts))
+ for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
+ for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
+ for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[l256*NumEltsInStride+l+i];
+ int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
+ if (!isUndefOrEqual(BitI, j+l256*NumElts))
return false;
- } else {
- if (!isUndefOrEqual(BitI1, j + NumElts))
+ if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
+ return false;
+ if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
return false;
}
}
}
-
return true;
}
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
static bool isUNPCKHMask(ArrayRef<int> Mask, MVT VT,
bool HasInt256, bool V2IsSplat = false) {
- unsigned NumElts = VT.getVectorNumElements();
-
- if (VT.is512BitVector())
- return false;
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ assert(VT.getSizeInBits() >= 128 &&
"Unsupported vector type for unpckh");
- if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
- (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ // AVX defines UNPCK* to operate independently on 128-bit lanes.
+ unsigned NumLanes;
+ unsigned NumOf256BitLanes;
+ unsigned NumElts = VT.getVectorNumElements();
+ if (VT.is256BitVector()) {
+ if (NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
+ NumLanes = 2;
+ NumOf256BitLanes = 1;
+ } else if (VT.is512BitVector()) {
+ assert(VT.getScalarType().getSizeInBits() >= 32 &&
+ "Unsupported vector type for unpckh");
+ NumLanes = 2;
+ NumOf256BitLanes = 2;
+ } else {
+ NumLanes = 1;
+ NumOf256BitLanes = 1;
+ }
- // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
- // independently on 128-bit lanes.
- unsigned NumLanes = VT.getSizeInBits()/128;
- unsigned NumLaneElts = NumElts/NumLanes;
+ unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
+ unsigned NumLaneElts = NumEltsInStride/NumLanes;
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
- int BitI = Mask[l+i];
- int BitI1 = Mask[l+i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (V2IsSplat) {
- if (isUndefOrEqual(BitI1, NumElts))
+ for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
+ for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
+ for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[l256*NumEltsInStride+l+i];
+ int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
+ if (!isUndefOrEqual(BitI, j+l256*NumElts))
return false;
- } else {
- if (!isUndefOrEqual(BitI1, j+NumElts))
+ if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
+ return false;
+ if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
return false;
}
}
/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
-static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+static bool isVPERM2X128Mask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
if (!HasFp256 || !VT.is256BitVector())
return false;
}
// Symetric in-lane mask. Each lane has 4 elements (for imm8)
-static bool isPermImmMask(ArrayRef<int> Mask, EVT VT, unsigned& Imm8) {
+static bool isPermImmMask(ArrayRef<int> Mask, MVT VT, unsigned& Imm8) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize < 32)
return false;
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
-static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
- if (!HasFp256)
+static bool isVPERMILPMask(ArrayRef<int> Mask, MVT VT) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (VT.getSizeInBits() < 256 || EltSize < 32)
return false;
-
+ bool symetricMaskRequired = (EltSize == 32);
unsigned NumElts = VT.getVectorNumElements();
- // Only match 256-bit with 32/64-bit types
- if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8))
- return false;
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned LaneSize = NumElts/NumLanes;
+ // 2 or 4 elements in one lane
+
+ SmallVector<int, 4> ExpectedMaskVal(LaneSize, -1);
for (unsigned l = 0; l != NumElts; l += LaneSize) {
for (unsigned i = 0; i != LaneSize; ++i) {
if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
return false;
- if (NumElts != 8 || l == 0)
- continue;
- // VPERMILPS handling
- if (Mask[i] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i+l], Mask[i]+l))
- return false;
+ if (symetricMaskRequired) {
+ if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) {
+ ExpectedMaskVal[i] = Mask[i+l] - l;
+ continue;
+ }
+ if (!isUndefOrEqual(Mask[i+l], ExpectedMaskVal[i]+l))
+ return false;
+ }
}
}
-
return true;
}
static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
MVT VT = N->getSimpleValueType(0);
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ assert((VT.getSizeInBits() >= 128) &&
"Unsupported vector type for PSHUF/SHUFP");
// Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
- assert((NumLaneElts == 2 || NumLaneElts == 4) &&
- "Only supports 2 or 4 elements per lane");
+ assert((NumLaneElts == 2 || NumLaneElts == 4 || NumLaneElts == 8) &&
+ "Only supports 2, 4 or 8 elements per lane");
- unsigned Shift = (NumLaneElts == 4) ? 1 : 0;
+ unsigned Shift = (NumLaneElts >= 4) ? 1 : 0;
unsigned Mask = 0;
for (unsigned i = 0; i != NumElts; ++i) {
int Elt = N->getMaskElt(i);
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
MVT VT = SVOp->getSimpleValueType(0);
- unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
+ unsigned EltSize = VT.is512BitVector() ? 1 :
+ VT.getVectorElementType().getSizeInBits() >> 3;
unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLanes = VT.is512BitVector() ? 1 : VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
int Val = 0;
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops,
array_lengthof(Ops));
}
+ } else if (VT.is512BitVector()) { // AVX-512
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
+ Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16);
} else
llvm_unreachable("Unexpected vector type");
}
/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
-static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
+static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
}
/// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
-static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
+static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
// Generate shuffles which repeat i16 and i8 several times until they can be
// represented by v4f32 and then be manipulated by target suported shuffles.
static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
- EVT VT = V.getValueType();
+ MVT VT = V.getSimpleValueType();
int NumElems = VT.getVectorNumElements();
SDLoc dl(V);
TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
}
-SDValue
-X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl,
- SelectionDAG &DAG) const {
+static SDValue
+LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
// Check if the scalar load can be widened into a vector load. And if
// the address is "base + cst" see if the cst can be "absorbed" into
DAG.getIntPtrConstant(0));
}
- if (!isSplatVector(Op.getNode()))
- llvm_unreachable("Unsupported predicate operation");
-
+ // Splat vector (with undefs)
SDValue In = Op.getOperand(0);
+ for (unsigned i = 1, e = Op.getNumOperands(); i != e; ++i) {
+ if (Op.getOperand(i) != In && Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ llvm_unreachable("Unsupported predicate operation");
+ }
+
SDValue EFLAGS, X86CC;
if (In.getOpcode() == ISD::SETCC) {
SDValue Op0 = In.getOperand(0);
if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
return Op;
- return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
+ if (!VT.is512BitVector())
+ return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
}
SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
- if (VT.is256BitVector()) {
+ if (VT.is256BitVector() || VT.is512BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
+ // There is no blend with immediate in AVX-512.
+ if (VT.is512BitVector())
+ return SDValue();
+
if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
return SDValue();
if (!Subtarget->hasInt256() && VT == MVT::v16i16)
/// getVZextMovL - Return a zero-extending vector move low node.
///
-static SDValue getVZextMovL(MVT VT, EVT OpVT,
+static SDValue getVZextMovL(MVT VT, MVT OpVT,
SDValue SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget, SDLoc dl) {
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
static
SDValue getMOVDDup(SDValue &Op, SDLoc &dl, SDValue V1, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
// Canonizalize to v2f64.
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
assert(VT != MVT::v2i64 && "unsupported shuffle type");
SDValue getMOVHighToLow(SDValue &Op, SDLoc &dl, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
"unsupported shuffle type");
SDValue getMOVLP(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
unsigned NumElems = VT.getVectorNumElements();
// Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
if (!Subtarget->hasSSE41())
return SDValue();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
// Only AVX2 support 256-bit vector integer extending.
if (!Subtarget->hasInt256() && VT.is256BitVector())
return SDValue();
}
- LLVMContext *Context = DAG.getContext();
unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
- EVT NeVT = EVT::getIntegerVT(*Context, NBits);
- EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift);
+ MVT NeVT = MVT::getIntegerVT(NBits);
+ MVT NVT = MVT::getVectorVT(NeVT, NumElems >> Shift);
if (!DAG.getTargetLoweringInfo().isTypeLegal(NVT))
return SDValue();
if (V1.getOpcode() == ISD::BITCAST &&
V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- V1.getOperand(0)
- .getOperand(0).getValueType().getSizeInBits() == SignificantBits) {
+ V1.getOperand(0).getOperand(0)
+ .getSimpleValueType().getSizeInBits() == SignificantBits) {
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
ConstantSDNode *CIdx =
// selection to fold it. Otherwise, we will short the conversion sequence.
if (CIdx && CIdx->getZExtValue() == 0 &&
(!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
- if (V.getValueSizeInBits() > V1.getValueSizeInBits()) {
+ MVT FullVT = V.getSimpleValueType();
+ MVT V1VT = V1.getSimpleValueType();
+ if (FullVT.getSizeInBits() > V1VT.getSizeInBits()) {
// The "ext_vec_elt" node is wider than the result node.
// In this case we should extract subvector from V.
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
- unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits();
- EVT FullVT = V.getValueType();
- EVT SubVecVT = EVT::getVectorVT(*Context,
- FullVT.getVectorElementType(),
+ unsigned Ratio = FullVT.getSizeInBits() / V1VT.getSizeInBits();
+ MVT SubVecVT = MVT::getVectorVT(FullVT.getVectorElementType(),
FullVT.getVectorNumElements()/Ratio);
V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
DAG.getIntPtrConstant(0));
}
- V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+ V1 = DAG.getNode(ISD::BITCAST, DL, V1VT, V);
}
}
}
// Normalize the node to match x86 shuffle ops if needed
- if (!V2IsUndef && (isSHUFPMask(M, VT, HasFp256, /* Commuted */ true)))
+ if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true)))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
getShufflePSHUFLWImmediate(SVOp),
DAG);
- if (isSHUFPMask(M, VT, HasFp256))
+ if (isSHUFPMask(M, VT))
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
getShuffleSHUFImmediate(SVOp), DAG);
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
// Handle VPERMILPS/D* permutations
- if (isVPERMILPMask(M, VT, HasFp256)) {
- if (HasInt256 && VT == MVT::v8i32)
+ if (isVPERMILPMask(M, VT)) {
+ if ((HasInt256 && VT == MVT::v8i32) || VT == MVT::v16i32)
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
if ((V2IsUndef && HasInt256 && VT.is256BitVector() && NumElems == 8) ||
VT.is512BitVector()) {
- EVT MaskEltVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getVectorElementType().getSizeInBits());
- EVT MaskVectorVT =
- EVT::getVectorVT(*DAG.getContext(),MaskEltVT, NumElems);
+ MVT MaskEltVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
+ MVT MaskVectorVT = MVT::getVectorVT(MaskEltVT, NumElems);
SmallVector<SDValue, 16> permclMask;
for (unsigned i = 0; i != NumElems; ++i) {
permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MaskEltVT));
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
- if (!isa<ConstantSDNode>(Op.getOperand(1)))
- return SDValue();
-
SDValue Vec = Op.getOperand(0);
MVT VecVT = Vec.getSimpleValueType();
+ SDValue Idx = Op.getOperand(1);
+ if (!isa<ConstantSDNode>(Idx)) {
+ if (VecVT.is512BitVector() ||
+ (VecVT.is256BitVector() && Subtarget->hasInt256() &&
+ VecVT.getVectorElementType().getSizeInBits() == 32)) {
+
+ MVT MaskEltVT =
+ MVT::getIntegerVT(VecVT.getVectorElementType().getSizeInBits());
+ MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() /
+ MaskEltVT.getSizeInBits());
+
+ Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT);
+ SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT,
+ getZeroVector(MaskVT, Subtarget, DAG, dl),
+ Idx, DAG.getConstant(0, getPointerTy()));
+ SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(),
+ Perm, DAG.getConstant(0, getPointerTy()));
+ }
+ return SDValue();
+ }
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
if (VecVT.is256BitVector() || VecVT.is512BitVector()) {
- SDValue Idx = Op.getOperand(1);
- unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
// Get the 128-bit vector.
Vec = Extract128BitVector(Vec, IdxVal, DAG, dl);
- EVT EltVT = VecVT.getVectorElementType();
+ MVT EltVT = VecVT.getVectorElementType();
unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
}
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
- LLVMContext *Context = DAG.getContext();
SDLoc dl(Op);
MVT OpVT = Op.getSimpleValueType();
if (!OpVT.is128BitVector()) {
// Insert into a 128-bit vector.
unsigned SizeFactor = OpVT.getSizeInBits()/128;
- EVT VT128 = EVT::getVectorVT(*Context,
- OpVT.getVectorElementType(),
+ MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(),
OpVT.getVectorNumElements() / SizeFactor);
Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
SDValue In = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- EVT ResVT = Op.getValueType();
- EVT InVT = In.getValueType();
+ MVT ResVT = Op.getSimpleValueType();
+ MVT InVT = In.getSimpleValueType();
if (Subtarget->hasFp256()) {
if (ResVT.is128BitVector() &&
SDValue SubVec = Op.getNode()->getOperand(1);
SDValue Idx = Op.getNode()->getOperand(2);
- if ((Op.getNode()->getValueType(0).is256BitVector() ||
- Op.getNode()->getValueType(0).is512BitVector()) &&
- SubVec.getNode()->getValueType(0).is128BitVector() &&
+ if ((Op.getNode()->getSimpleValueType(0).is256BitVector() ||
+ Op.getNode()->getSimpleValueType(0).is512BitVector()) &&
+ SubVec.getNode()->getSimpleValueType(0).is128BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
}
- if (Op.getNode()->getValueType(0).is512BitVector() &&
- SubVec.getNode()->getValueType(0).is256BitVector() &&
+ if (Op.getNode()->getSimpleValueType(0).is512BitVector() &&
+ SubVec.getNode()->getSimpleValueType(0).is256BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
is64Bit ? 257 : 256));
- SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
- DAG.getIntPtrConstant(0),
- MachinePointerInfo(Ptr),
- false, false, false, 0);
+ SDValue ThreadPointer =
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0),
+ MachinePointerInfo(Ptr), false, false, false, 0);
unsigned char OperandFlags = 0;
// Most TLS accesses are not RIP relative, even on x86-64. One exception is
// emit "addl x@ntpoff,%eax" (local exec)
// or "addl x@indntpoff,%eax" (initial exec)
// or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
- GA->getValueType(0),
- GA->getOffset(), OperandFlags);
+ SDValue TGA =
+ DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
+ GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec) {
if (isPIC && !is64Bit) {
Offset = DAG.getNode(ISD::ADD, dl, PtrVT,
- DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
Offset);
}
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
- MachinePointerInfo::getGOT(), false, false, false,
- 0);
+ MachinePointerInfo::getGOT(), false, false, false, 0);
}
// The address of the thread local variable is the add of the thread
// Concat upper and lower parts.
//
- if (((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
+ if (((VT != MVT::v16i16) || (InVT != MVT::v16i8)) &&
+ ((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
return SDValue();
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
-SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT VT = Op->getValueType(0).getSimpleVT();
+ SDValue In = Op->getOperand(0);
+ MVT InVT = In.getValueType().getSimpleVT();
+ SDLoc DL(Op);
+ unsigned int NumElts = VT.getVectorNumElements();
+ if (NumElts != 8 && NumElts != 16)
+ return SDValue();
+
+ if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
+
+ EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Now we have only mask extension
+ assert(InVT.getVectorElementType() == MVT::i1);
+ SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
+ const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+ SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+
+ SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld);
+ if (VT.is512BitVector())
+ return Brcst;
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst);
+}
+
+static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
if (Res.getNode())
return SDValue();
}
-SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
+
+static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
+ if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
+ return LowerZERO_EXTEND_AVX512(Op, DAG);
+
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
if (Res.getNode())
return Res;
}
- if (!VT.is256BitVector() || !SVT.is128BitVector() ||
- VT.getVectorNumElements() != SVT.getVectorNumElements())
- return SDValue();
-
- assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!");
-
- // AVX2 has better support of integer extending.
- if (Subtarget->hasInt256())
- return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
-
- SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
- static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1};
- SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32,
- DAG.getVectorShuffle(MVT::v8i16, DL, In,
- DAG.getUNDEF(MVT::v8i16),
- &Mask[0]));
-
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
+ assert(!VT.is256BitVector() || !SVT.is128BitVector() ||
+ VT.getVectorNumElements() != SVT.getVectorNumElements());
+ return SDValue();
}
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
+ MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
- MVT SVT = In.getSimpleValueType();
-
- if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) {
+ MVT InVT = In.getSimpleValueType();
+ assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
+ "Invalid TRUNCATE operation");
+
+ if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
+ if (VT.getVectorElementType().getSizeInBits() >=8)
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
+
+ assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
+ unsigned NumElts = InVT.getVectorNumElements();
+ assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
+ if (InVT.getSizeInBits() < 512) {
+ MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64;
+ In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
+ InVT = ExtVT;
+ }
+ SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
+ const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+ SDValue CP = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ SDValue OneV = DAG.getNode(X86ISD::VBROADCAST, DL, InVT, Ld);
+ SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In);
+ return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
+ }
+
+ if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget->hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2);
}
- if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) {
+ if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
// On AVX2, v8i32 -> v8i16 becomed PSHUFB.
if (Subtarget->hasInt256()) {
In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
}
// Handle truncation of V256 to V128 using shuffles.
- if (!VT.is128BitVector() || !SVT.is256BitVector())
+ if (!VT.is128BitVector() || !InVT.is256BitVector())
return SDValue();
- assert(VT.getVectorNumElements() != SVT.getVectorNumElements() &&
- "Invalid op");
assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
unsigned NumElems = VT.getVectorNumElements();
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
if (VT.isVector()) {
- MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+ MVT XORVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits()/64);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::XOR, dl, XORVT,
DAG.getNode(ISD::BITCAST, dl, XORVT,
unsigned NumOperands = 0;
// Truncate operations may prevent the merge of the SETCC instruction
- // and the arithmetic intruction before it. Attempt to truncate the operands
+ // and the arithmetic instruction before it. Attempt to truncate the operands
// of the arithmetic instruction and use a reduced bit-width instruction.
bool NeedTruncation = false;
SDValue ArithOp = Op;
}
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
- SDValue Cond;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- SDValue Cond;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
}
-SDValue X86TargetLowering::LowerSIGN_EXTEND_AVX512(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op->getValueType(0);
+static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
- EVT InVT = In.getValueType();
+ MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
- if (InVT.getVectorElementType().getSizeInBits() >=8 &&
- VT.getVectorElementType().getSizeInBits() >= 32)
+ unsigned int NumElts = VT.getVectorNumElements();
+ if (NumElts != 8 && NumElts != 16)
+ return SDValue();
+
+ if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
- if (InVT.getVectorElementType() == MVT::i1) {
- unsigned int NumElts = InVT.getVectorNumElements();
- assert ((NumElts == 8 || NumElts == 16) &&
- "Unsupported SIGN_EXTEND operation");
- if (VT.getVectorElementType().getSizeInBits() >= 32) {
- Constant *C =
- ConstantInt::get(*DAG.getContext(),
- (NumElts == 8)? APInt(64, ~0ULL): APInt(32, ~0U));
- SDValue CP = DAG.getConstantPool(C, getPointerTy());
- unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
- SDValue Ld = DAG.getLoad(VT.getScalarType(), dl, DAG.getEntryNode(), CP,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
- return DAG.getNode(X86ISD::VBROADCASTM, dl, VT, In, Ld);
- }
- }
- return SDValue();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
+
+ MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32;
+ Constant *C = ConstantInt::get(*DAG.getContext(),
+ APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits()));
+
+ SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld);
+ if (VT.is512BitVector())
+ return Brcst;
+ return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst);
}
-SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
return LowerSIGN_EXTEND_AVX512(Op, DAG);
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
- (VT != MVT::v8i32 || InVT != MVT::v8i16))
+ (VT != MVT::v8i32 || InVT != MVT::v8i16) &&
+ (VT != MVT::v16i16 || InVT != MVT::v16i8))
return SDValue();
if (Subtarget->hasInt256())
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
- // FIXME: Ensure alignment here
+ unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ EVT VT = Op.getNode()->getValueType(0);
bool Is64Bit = Subtarget->is64Bit();
EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
- Flag = Chain.getValue(1);
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
- Chain = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
- SPTy).getValue(1);
+ unsigned SPReg = RegInfo->getStackRegister();
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
+ Chain = SP.getValue(1);
- SDValue Ops1[2] = { Chain.getValue(0), Chain };
+ if (Align) {
+ SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
+ DAG.getConstant(-(uint64_t)Align, VT));
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
+ }
+
+ SDValue Ops1[2] = { SP, Chain };
return DAG.getMergeValues(Ops1, 2, dl);
}
}
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
+// getTargetVShiftByConstNode - Handle vector element shifts where the shift
+// amount is a constant. Takes immediate version of shift as input.
+static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, EVT VT,
+ SDValue SrcOp, uint64_t ShiftAmt,
+ SelectionDAG &DAG) {
+
+ // Check for ShiftAmt >= element width
+ if (ShiftAmt >= VT.getVectorElementType().getSizeInBits()) {
+ if (Opc == X86ISD::VSRAI)
+ ShiftAmt = VT.getVectorElementType().getSizeInBits() - 1;
+ else
+ return DAG.getConstant(0, VT);
+ }
+
+ assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
+ && "Unknown target vector shift-by-constant node");
+
+ return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8));
+}
+
// getTargetVShiftNode - Handle vector element shifts where the shift amount
// may or may not be a constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, EVT VT,
SelectionDAG &DAG) {
assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
- if (isa<ConstantSDNode>(ShAmt)) {
- // Constant may be a TargetConstant. Use a regular constant.
- uint32_t ShiftAmt = cast<ConstantSDNode>(ShAmt)->getZExtValue();
- switch (Opc) {
- default: llvm_unreachable("Unknown target vector shift node");
- case X86ISD::VSHLI:
- case X86ISD::VSRLI:
- case X86ISD::VSRAI:
- return DAG.getNode(Opc, dl, VT, SrcOp,
- DAG.getConstant(ShiftAmt, MVT::i32));
- }
- }
+ // Catch shift-by-constant.
+ if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
+ return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
+ CShAmt->getZExtValue(), DAG);
// Change opcode to non-immediate version
switch (Opc) {
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_q:
case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_sse41_pminuw:
case Intrinsic::x86_sse41_pminud:
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_avx512_pminu_d:
+ case Intrinsic::x86_avx512_pminu_q:
case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_sse2_pmaxs_w:
case Intrinsic::x86_sse41_pmaxsd:
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_q:
case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_sse2_pmins_w:
case Intrinsic::x86_sse41_pminsd:
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
- case Intrinsic::x86_avx2_pmins_d: {
+ case Intrinsic::x86_avx2_pmins_d:
+ case Intrinsic::x86_avx512_pmins_d:
+ case Intrinsic::x86_avx512_pmins_q: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_q:
Opcode = X86ISD::UMAX;
break;
case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_avx512_pminu_d:
+ case Intrinsic::x86_avx512_pminu_q:
Opcode = X86ISD::UMIN;
break;
case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_q:
Opcode = X86ISD::SMAX;
break;
case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
case Intrinsic::x86_avx2_pmins_d:
+ case Intrinsic::x86_avx512_pmins_d:
+ case Intrinsic::x86_avx512_pmins_q:
Opcode = X86ISD::SMIN;
break;
}
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
+ case Intrinsic::x86_avx512_max_ps_512:
+ case Intrinsic::x86_avx512_max_pd_512:
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
- case Intrinsic::x86_avx_min_pd_256: {
+ case Intrinsic::x86_avx_min_pd_256:
+ case Intrinsic::x86_avx512_min_ps_512:
+ case Intrinsic::x86_avx512_min_pd_512: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
+ case Intrinsic::x86_avx512_max_ps_512:
+ case Intrinsic::x86_avx512_max_pd_512:
Opcode = X86ISD::FMAX;
break;
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
case Intrinsic::x86_avx_min_pd_256:
+ case Intrinsic::x86_avx512_min_ps_512:
+ case Intrinsic::x86_avx512_min_pd_512:
Opcode = X86ISD::FMIN;
break;
}
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
- // but second operand for node/intruction.
+ // but second operand for node/instruction.
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+ case Intrinsic::x86_avx512_kortestz:
+ case Intrinsic::x86_avx512_kortestc: {
+ unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz)? X86::COND_E: X86::COND_B;
+ SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
+ SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
+ SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+ SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
// SSE/AVX shift intrinsics
case Intrinsic::x86_sse2_psll_w:
}
}
-static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
+static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Base, SDValue Index,
+ SDValue ScaleOp, SDValue Chain,
+ const X86Subtarget * Subtarget) {
+ SDLoc dl(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ assert(C && "Invalid scale type");
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Index.getValueType().getVectorNumElements());
+ SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
+ SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
+ return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl);
+}
+
+static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain,
+ const X86Subtarget * Subtarget) {
+ SDLoc dl(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ assert(C && "Invalid scale type");
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Index.getValueType().getVectorNumElements());
+ SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
+ SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ if (Src.getOpcode() == ISD::UNDEF)
+ Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
+ SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
+ return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl);
+}
+
+static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Base, SDValue Index,
+ SDValue ScaleOp, SDValue Chain) {
+ SDLoc dl(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ assert(C && "Invalid scale type");
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Index.getValueType().getVectorNumElements());
+ SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
+ SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
+ SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ return SDValue(Res, 1);
+}
+
+static SDValue getMScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain) {
+ SDLoc dl(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ assert(C && "Invalid scale type");
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Index.getValueType().getVectorNumElements());
+ SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
+ SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ return SDValue(Res, 1);
+}
+
+static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc dl(Op);
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntNo) {
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
SDValue(Result.getNode(), 2));
}
-
+ //int_gather(index, base, scale);
+ case Intrinsic::x86_avx512_gather_qpd_512:
+ case Intrinsic::x86_avx512_gather_qps_512:
+ case Intrinsic::x86_avx512_gather_dpd_512:
+ case Intrinsic::x86_avx512_gather_qpi_512:
+ case Intrinsic::x86_avx512_gather_qpq_512:
+ case Intrinsic::x86_avx512_gather_dpq_512:
+ case Intrinsic::x86_avx512_gather_dps_512:
+ case Intrinsic::x86_avx512_gather_dpi_512: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx512_gather_qps_512: Opc = X86::VGATHERQPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpd_512: Opc = X86::VGATHERQPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpd_512: Opc = X86::VGATHERDPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dps_512: Opc = X86::VGATHERDPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpi_512: Opc = X86::VPGATHERQDZrm; break;
+ case Intrinsic::x86_avx512_gather_qpq_512: Opc = X86::VPGATHERQQZrm; break;
+ case Intrinsic::x86_avx512_gather_dpi_512: Opc = X86::VPGATHERDDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpq_512: Opc = X86::VPGATHERDQZrm; break;
+ }
+ SDValue Chain = Op.getOperand(0);
+ SDValue Index = Op.getOperand(2);
+ SDValue Base = Op.getOperand(3);
+ SDValue Scale = Op.getOperand(4);
+ return getGatherNode(Opc, Op, DAG, Base, Index, Scale, Chain, Subtarget);
+ }
+ //int_gather_mask(v1, mask, index, base, scale);
+ case Intrinsic::x86_avx512_gather_qps_mask_512:
+ case Intrinsic::x86_avx512_gather_qpd_mask_512:
+ case Intrinsic::x86_avx512_gather_dpd_mask_512:
+ case Intrinsic::x86_avx512_gather_dps_mask_512:
+ case Intrinsic::x86_avx512_gather_qpi_mask_512:
+ case Intrinsic::x86_avx512_gather_qpq_mask_512:
+ case Intrinsic::x86_avx512_gather_dpi_mask_512:
+ case Intrinsic::x86_avx512_gather_dpq_mask_512: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx512_gather_qps_mask_512:
+ Opc = X86::VGATHERQPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpd_mask_512:
+ Opc = X86::VGATHERQPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpd_mask_512:
+ Opc = X86::VGATHERDPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dps_mask_512:
+ Opc = X86::VGATHERDPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpi_mask_512:
+ Opc = X86::VPGATHERQDZrm; break;
+ case Intrinsic::x86_avx512_gather_qpq_mask_512:
+ Opc = X86::VPGATHERQQZrm; break;
+ case Intrinsic::x86_avx512_gather_dpi_mask_512:
+ Opc = X86::VPGATHERDDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpq_mask_512:
+ Opc = X86::VPGATHERDQZrm; break;
+ }
+ SDValue Chain = Op.getOperand(0);
+ SDValue Src = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ SDValue Index = Op.getOperand(4);
+ SDValue Base = Op.getOperand(5);
+ SDValue Scale = Op.getOperand(6);
+ return getMGatherNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
+ Subtarget);
+ }
+ //int_scatter(base, index, v1, scale);
+ case Intrinsic::x86_avx512_scatter_qpd_512:
+ case Intrinsic::x86_avx512_scatter_qps_512:
+ case Intrinsic::x86_avx512_scatter_dpd_512:
+ case Intrinsic::x86_avx512_scatter_qpi_512:
+ case Intrinsic::x86_avx512_scatter_qpq_512:
+ case Intrinsic::x86_avx512_scatter_dpq_512:
+ case Intrinsic::x86_avx512_scatter_dps_512:
+ case Intrinsic::x86_avx512_scatter_dpi_512: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx512_scatter_qpd_512:
+ Opc = X86::VSCATTERQPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qps_512:
+ Opc = X86::VSCATTERQPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpd_512:
+ Opc = X86::VSCATTERDPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_dps_512:
+ Opc = X86::VSCATTERDPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpi_512:
+ Opc = X86::VPSCATTERQDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpq_512:
+ Opc = X86::VPSCATTERQQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpq_512:
+ Opc = X86::VPSCATTERDQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpi_512:
+ Opc = X86::VPSCATTERDDZmr; break;
+ }
+ SDValue Chain = Op.getOperand(0);
+ SDValue Base = Op.getOperand(2);
+ SDValue Index = Op.getOperand(3);
+ SDValue Src = Op.getOperand(4);
+ SDValue Scale = Op.getOperand(5);
+ return getScatterNode(Opc, Op, DAG, Src, Base, Index, Scale, Chain);
+ }
+ //int_scatter_mask(base, mask, index, v1, scale);
+ case Intrinsic::x86_avx512_scatter_qps_mask_512:
+ case Intrinsic::x86_avx512_scatter_qpd_mask_512:
+ case Intrinsic::x86_avx512_scatter_dpd_mask_512:
+ case Intrinsic::x86_avx512_scatter_dps_mask_512:
+ case Intrinsic::x86_avx512_scatter_qpi_mask_512:
+ case Intrinsic::x86_avx512_scatter_qpq_mask_512:
+ case Intrinsic::x86_avx512_scatter_dpi_mask_512:
+ case Intrinsic::x86_avx512_scatter_dpq_mask_512: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx512_scatter_qpd_mask_512:
+ Opc = X86::VSCATTERQPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qps_mask_512:
+ Opc = X86::VSCATTERQPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpd_mask_512:
+ Opc = X86::VSCATTERDPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_dps_mask_512:
+ Opc = X86::VSCATTERDPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpi_mask_512:
+ Opc = X86::VPSCATTERQDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpq_mask_512:
+ Opc = X86::VPSCATTERQQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpq_mask_512:
+ Opc = X86::VPSCATTERDQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpi_mask_512:
+ Opc = X86::VPSCATTERDDZmr; break;
+ }
+ SDValue Chain = Op.getOperand(0);
+ SDValue Base = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ SDValue Index = Op.getOperand(4);
+ SDValue Src = Op.getOperand(5);
+ SDValue Scale = Op.getOperand(6);
+ return getMScatterNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
+ }
// XTEST intrinsics.
case Intrinsic::x86_xtest: {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
}
- assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
- "Only know how to lower V2I64/V4I64 multiply");
+ assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&
+ "Only know how to lower V2I64/V4I64/V8I64 multiply");
// Ahi = psrlqi(a, 32);
// Bhi = psrlqi(b, 32);
// AhiBlo = psllqi(AhiBlo, 32);
// return AloBlo + AloBhi + AhiBlo;
- SDValue ShAmt = DAG.getConstant(32, MVT::i32);
-
- SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt);
- SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B, ShAmt);
+ SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
+ SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
// Bit cast to 32-bit vectors for MULUDQ
- EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : MVT::v8i32;
+ EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
+ (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
- AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi, ShAmt);
- AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo, ShAmt);
+ AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
+ AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
if ((SplatValue != 0) &&
(SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
- unsigned lg2 = SplatValue.countTrailingZeros();
+ unsigned Lg2 = SplatValue.countTrailingZeros();
// Splat the sign bit.
- SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32);
- SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG);
+ SmallVector<SDValue, 16> Sz(NumElts,
+ DAG.getConstant(EltTy.getSizeInBits() - 1,
+ EltTy));
+ SDValue SGN = DAG.getNode(ISD::SRA, dl, VT, N0,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Sz[0],
+ NumElts));
// Add (N0 < 0) ? abs2 - 1 : 0;
- SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32);
- SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG);
+ SmallVector<SDValue, 16> Amt(NumElts,
+ DAG.getConstant(EltTy.getSizeInBits() - Lg2,
+ EltTy));
+ SDValue SRL = DAG.getNode(ISD::SRL, dl, VT, SGN,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Amt[0],
+ NumElts));
SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
- SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32);
- SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG);
+ SmallVector<SDValue, 16> Lg2Amt(NumElts, DAG.getConstant(Lg2, EltTy));
+ SDValue SRA = DAG.getNode(ISD::SRA, dl, VT, ADD,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Lg2Amt[0],
+ NumElts));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
(Subtarget->hasInt256() &&
- (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16)) ||
+ (Subtarget->hasAVX512() &&
+ (VT == MVT::v8i64 || VT == MVT::v16i32))) {
if (Op.getOpcode() == ISD::SHL)
- return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
+ DAG);
if (Op.getOpcode() == ISD::SRL)
- return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
+ DAG);
if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
- return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
+ DAG);
}
if (VT == MVT::v16i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
- SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
+ MVT::v8i16, R, ShiftAmt,
+ DAG);
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
// Zero out the rightmost bits.
SmallVector<SDValue, 16> V(16,
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
- SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v8i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
+ MVT::v8i16, R, ShiftAmt,
+ DAG);
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
// Zero out the leftmost bits.
SmallVector<SDValue, 16> V(16,
if (Subtarget->hasInt256() && VT == MVT::v32i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
- SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
+ MVT::v16i16, R, ShiftAmt,
+ DAG);
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
// Zero out the rightmost bits.
SmallVector<SDValue, 32> V(32,
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
- SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v16i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
+ MVT::v16i16, R, ShiftAmt,
+ DAG);
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
// Zero out the leftmost bits.
SmallVector<SDValue, 32> V(32,
default:
llvm_unreachable("Unknown shift opcode!");
case ISD::SHL:
- return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
+ DAG);
case ISD::SRL:
- return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
+ DAG);
case ISD::SRA:
- return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
+ DAG);
}
}
VT == MVT::v4i32 || VT == MVT::v8i16 ||
(Subtarget->hasInt256() &&
((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
- VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ VT == MVT::v8i32 || VT == MVT::v16i16)) ||
+ (Subtarget->hasAVX512() && (VT == MVT::v8i64 || VT == MVT::v16i32))) {
SDValue BaseShAmt;
EVT EltVT = VT.getVectorElementType();
case MVT::v4i64:
case MVT::v8i32:
case MVT::v16i16:
+ case MVT::v16i32:
+ case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
}
case ISD::SRA:
case MVT::v8i16:
case MVT::v8i32:
case MVT::v16i16:
+ case MVT::v16i32:
+ case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
}
case ISD::SRL:
case MVT::v4i64:
case MVT::v8i32:
case MVT::v16i16:
+ case MVT::v16i32:
+ case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
}
}
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
if (!Subtarget->is64Bit() &&
- (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64) ||
+ (Subtarget->hasAVX512() && VT == MVT::v8i64)) &&
Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
if (V.getNode())
return V;
+ if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64))
+ return Op;
// AVX2 has VPSLLV/VPSRAV/VPSRLV.
if (Subtarget->hasInt256()) {
if (Op.getOpcode() == ISD::SRL &&
// r = VSELECT(r, psllw(r & (char16)15, 4), a);
SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
- M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
- DAG.getConstant(4, MVT::i32), DAG);
+ M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 4, DAG);
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
// r = VSELECT(r, psllw(r & (char16)63, 2), a);
M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
- M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
- DAG.getConstant(2, MVT::i32), DAG);
+ M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 2, DAG);
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
ExtraVT.getScalarType().getSizeInBits();
- SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
switch (VT.getSimpleVT().SimpleTy) {
default: return SDValue();
}
// If the above didn't work, then just use Shift-Left + Shift-Right.
- Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
- return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
+ Tmp1 = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0, BitsDiff,
+ DAG);
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Tmp1, BitsDiff,
+ DAG);
}
}
}
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
- case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
- case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
- case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG);
+ case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
+ case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG);
+ case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::FRAME_TO_ARGS_OFFSET:
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
case X86ISD::VSEXT: return "X86ISD::VSEXT";
+ case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
+ case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM";
+ case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::BLSI: return "X86ISD::BLSI";
case X86ISD::BLSMSK: return "X86ISD::BLSMSK";
case X86ISD::BLSR: return "X86ISD::BLSR";
+ case X86ISD::BZHI: return "X86ISD::BZHI";
+ case X86ISD::BEXTR: return "X86ISD::BEXTR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
+ case X86ISD::TESTM: return "X86ISD::TESTM";
+ case X86ISD::KORTEST: return "X86ISD::KORTEST";
+ case X86ISD::KTEST: return "X86ISD::KTEST";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
return (SVT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, SVT) ||
- isSHUFPMask(M, SVT, Subtarget->hasFp256()) ||
+ isSHUFPMask(M, SVT) ||
isPSHUFDMask(M, SVT) ||
isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) ||
isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) ||
if (NumElts == 4 && SVT.is128BitVector()) {
return (isMOVLMask(Mask, SVT) ||
isCommutedMOVLMask(Mask, SVT, true) ||
- isSHUFPMask(Mask, SVT, Subtarget->hasFp256()) ||
- isSHUFPMask(Mask, SVT, Subtarget->hasFp256(), /* Commuted */ true));
+ isSHUFPMask(Mask, SVT) ||
+ isSHUFPMask(Mask, SVT, /* Commuted */ true));
}
return false;
}
case X86::CMOV_V8F32:
case X86::CMOV_V4F64:
case X86::CMOV_V4I64:
+ case X86::CMOV_V16F32:
+ case X86::CMOV_V8F64:
+ case X86::CMOV_V8I64:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
}
/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
-static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
- SDValue RHS, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+static std::pair<unsigned, bool>
+matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, const X86Subtarget *Subtarget) {
if (!VT.isVector())
- return 0;
+ return std::make_pair(0, false);
+ bool NeedSplit = false;
switch (VT.getSimpleVT().SimpleTy) {
- default: return 0;
+ default: return std::make_pair(0, false);
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
if (!Subtarget->hasAVX2())
- return 0;
+ NeedSplit = true;
+ if (!Subtarget->hasAVX())
+ return std::make_pair(0, false);
+ break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
if (!Subtarget->hasSSE2())
- return 0;
+ return std::make_pair(0, false);
}
// SSE2 has only a small subset of the operations.
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ unsigned Opc = 0;
// Check for x CC y ? x : y.
if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
default: break;
case ISD::SETULT:
case ISD::SETULE:
- return hasUnsigned ? X86ISD::UMIN : 0;
+ Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
- return hasUnsigned ? X86ISD::UMAX : 0;
+ Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
case ISD::SETLT:
case ISD::SETLE:
- return hasSigned ? X86ISD::SMIN : 0;
+ Opc = hasSigned ? X86ISD::SMIN : 0; break;
case ISD::SETGT:
case ISD::SETGE:
- return hasSigned ? X86ISD::SMAX : 0;
+ Opc = hasSigned ? X86ISD::SMAX : 0; break;
}
// Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
default: break;
case ISD::SETULT:
case ISD::SETULE:
- return hasUnsigned ? X86ISD::UMAX : 0;
+ Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
- return hasUnsigned ? X86ISD::UMIN : 0;
+ Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
case ISD::SETLT:
case ISD::SETLE:
- return hasSigned ? X86ISD::SMAX : 0;
+ Opc = hasSigned ? X86ISD::SMAX : 0; break;
case ISD::SETGT:
case ISD::SETGE:
- return hasSigned ? X86ISD::SMIN : 0;
+ Opc = hasSigned ? X86ISD::SMIN : 0; break;
}
}
- return 0;
+ return std::make_pair(Opc, NeedSplit);
}
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
EVT VT = LHS.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
// instructions match the semantics of the common C idiom x<y?x:y but not
// x<=y?x:y, because of how they handle negative zero (which can be
// ignored in unsafe-math mode).
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
- VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ VT != MVT::f80 && TLI.isTypeLegal(VT) &&
(Subtarget->hasSSE2() ||
(Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
}
+ EVT CondVT = Cond.getValueType();
+ if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
+ CondVT.getVectorElementType() == MVT::i1) {
+ // v16i8 (select v16i1, v16i8, v16i8) does not have a proper
+ // lowering on AVX-512. In this case we convert it to
+ // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
+ // The same situation for all 128 and 256-bit vectors of i8 and i16
+ EVT OpVT = LHS.getValueType();
+ if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
+ (OpVT.getVectorElementType() == MVT::i8 ||
+ OpVT.getVectorElementType() == MVT::i16)) {
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
+ DCI.AddToWorklist(Cond.getNode());
+ return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
+ }
+ }
// If this is a select between two integer constants, try to do some
// optimizations.
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
}
}
+ // Early exit check
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
// Match VSELECTs into subs with unsigned saturation.
- if (!DCI.isBeforeLegalize() &&
- N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
// psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
(Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
}
// Try to match a min/max vector operation.
- if (!DCI.isBeforeLegalize() &&
- N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC)
- if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
- return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) {
+ std::pair<unsigned, bool> ret = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget);
+ unsigned Opc = ret.first;
+ bool NeedSplit = ret.second;
+
+ if (Opc && NeedSplit) {
+ unsigned NumElems = VT.getVectorNumElements();
+ // Extract the LHS vectors
+ SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, DL);
+ SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, DL);
+
+ // Extract the RHS vectors
+ SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, DL);
+ SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, DL);
+
+ // Create min/max for each subvector
+ LHS = DAG.getNode(Opc, DL, LHS1.getValueType(), LHS1, RHS1);
+ RHS = DAG.getNode(Opc, DL, LHS2.getValueType(), LHS2, RHS2);
+
+ // Merge the result
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS, RHS);
+ } else if (Opc)
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ }
// Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
- if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT &&
- Cond.getOpcode() == ISD::SETCC) {
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+ // Check if SETCC has already been promoted
+ TLI.getSetCCResultType(*DAG.getContext(), VT) == Cond.getValueType()) {
assert(Cond.getValueType().isVector() &&
"vector select expects a vector selector!");
// matched by one of the SSE/AVX BLEND instructions. These instructions only
// depend on the highest bit in each word. Try to use SimplifyDemandedBits
// to simplify previous instructions.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() && TLI.isOperationLegal(ISD::VSELECT, VT)) {
unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
if (R.getNode())
return R;
- // Create BLSI, and BLSR instructions
+ // Create BLSI, BLSR, and BZHI instructions
// BLSI is X & (-X)
// BLSR is X & (X-1)
- if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
+ // BZHI is X & ((1 << Y) - 1)
+ // BEXTR is ((X >> imm) & (2**size-1))
+ if (VT == MVT::i32 || VT == MVT::i64) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
- // Check LHS for neg
- if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
- isZero(N0.getOperand(0)))
- return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
-
- // Check RHS for neg
- if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
- isZero(N1.getOperand(0)))
- return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
+ if (Subtarget->hasBMI()) {
+ // Check LHS for neg
+ if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
+ isZero(N0.getOperand(0)))
+ return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
+
+ // Check RHS for neg
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
+ isZero(N1.getOperand(0)))
+ return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
+
+ // Check LHS for X-1
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
+ isAllOnes(N0.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
+
+ // Check RHS for X-1
+ if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
+ isAllOnes(N1.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
+ }
+
+ if (Subtarget->hasBMI2()) {
+ // Check for (and (add (shl 1, Y), -1), X)
+ if (N0.getOpcode() == ISD::ADD && isAllOnes(N0.getOperand(1))) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::SHL) {
+ SDValue N001 = N00.getOperand(1);
+ assert(N001.getValueType() == MVT::i8 && "unexpected type");
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N00.getOperand(0));
+ if (C && C->getZExtValue() == 1)
+ return DAG.getNode(X86ISD::BZHI, DL, VT, N1, N001);
+ }
+ }
- // Check LHS for X-1
- if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
- isAllOnes(N0.getOperand(1)))
- return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
+ // Check for (and X, (add (shl 1, Y), -1))
+ if (N1.getOpcode() == ISD::ADD && isAllOnes(N1.getOperand(1))) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::SHL) {
+ SDValue N101 = N10.getOperand(1);
+ assert(N101.getValueType() == MVT::i8 && "unexpected type");
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N10.getOperand(0));
+ if (C && C->getZExtValue() == 1)
+ return DAG.getNode(X86ISD::BZHI, DL, VT, N0, N101);
+ }
+ }
+ }
- // Check RHS for X-1
- if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
- isAllOnes(N1.getOperand(1)))
- return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
+ // Check for BEXTR.
+ if ((Subtarget->hasBMI() || Subtarget->hasTBM()) &&
+ (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) {
+ ConstantSDNode *MaskNode = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (MaskNode && ShiftNode) {
+ uint64_t Mask = MaskNode->getZExtValue();
+ uint64_t Shift = ShiftNode->getZExtValue();
+ if (isMask_64(Mask)) {
+ uint64_t MaskSize = CountPopulation_64(Mask);
+ if (Shift + MaskSize <= VT.getSizeInBits())
+ return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Shift | (MaskSize << 8), VT));
+ }
+ }
+ } // BEXTR
return SDValue();
}
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!XTLI->getSubtarget()->is64Bit() &&
- !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ VT == MVT::i64) {
SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));