default:
return TargetLowering::findRepresentativeClass(VT);
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
- RRC = Subtarget->is64Bit() ?
- (const TargetRegisterClass*)&X86::GR64RegClass :
- (const TargetRegisterClass*)&X86::GR32RegClass;
+ RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
break;
case MVT::x86mmx:
RRC = &X86::VR64RegClass;
UI != UE; ++UI) {
if (UI->getOpcode() != X86ISD::RET_FLAG)
return false;
+ // If we are returning more than one value, we can definitely
+ // not make a tail call see PR19530
+ if (UI->getNumOperands() > 4)
+ return false;
+ if (UI->getNumOperands() == 4 &&
+ UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
+ return false;
HasRet = true;
}
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
+ CCInfo.AlignStack(Is64Bit ? 8 : 4);
unsigned LastVal = ~0U;
SDValue ArgValue;
return true;
}
+// Hide this symbol with an anonymous namespace instead of 'static' so that MSVC
+// 2013 will allow us to use it as a non-type template parameter.
+namespace {
+
+/// \brief Implementation of the \c isShuffleEquivalent variadic functor.
+///
+/// See its documentation for details.
+bool isShuffleEquivalentImpl(ArrayRef<int> Mask, ArrayRef<const int *> Args) {
+ if (Mask.size() != Args.size())
+ return false;
+ for (int i = 0, e = Mask.size(); i < e; ++i) {
+ assert(*Args[i] >= 0 && "Arguments must be positive integers!");
+ assert(*Args[i] < (int)Args.size() * 2 &&
+ "Argument outside the range of possible shuffle inputs!");
+ if (Mask[i] != -1 && Mask[i] != *Args[i])
+ return false;
+ }
+ return true;
+}
+
+} // namespace
+
+/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
+/// arguments.
+///
+/// This is a fast way to test a shuffle mask against a fixed pattern:
+///
+/// if (isShuffleEquivalent(Mask, 3, 2, 1, 0)) { ... }
+///
+/// It returns true if the mask is exactly as wide as the argument list, and
+/// each element of the mask is either -1 (signifying undef) or the value given
+/// in the argument.
+static const VariadicFunction1<
+ bool, ArrayRef<int>, int, isShuffleEquivalentImpl> isShuffleEquivalent = {};
+
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
///
/// This helper function produces an 8-bit shuffle immediate corresponding to
assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
assert(Mask[1] >= 2 && "Non-canonicalized blend!");
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
+
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
DAG.getConstant(SHUFPDMask, MVT::i8));
getV4X86ShuffleImm8ForMask(WidenedMask, DAG)));
}
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
+
// We implement this with SHUFPD which is pretty lame because it will likely
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
// However, all the alternatives are still more cycles and newer chips don't
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
getV4X86ShuffleImm8ForMask(Mask, DAG));
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
+
if (NumV2Elements == 1) {
int V2Index =
std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(Mask, DAG));
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
+
// We implement this with SHUFPS because it can blend from two vectors.
// Because we're going to eventually use SHUFPS, we use SHUFPS even to build
// up the inputs, bypassing domain shift penalties that we would encur if we
DAG.getConstant(VPERMILPMask, MVT::i8));
}
+ // X86 has dedicated unpack instructions that can handle specific blend
+ // operations: UNPCKH and UNPCKL.
+ if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
+ // FIXME: It would be nice to find a way to get canonicalization to commute
+ // these patterns.
+ if (isShuffleEquivalent(Mask, 4, 0, 6, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V2, V1);
+ if (isShuffleEquivalent(Mask, 5, 1, 7, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
+
// Check if the blend happens to exactly fit that of SHUFPD.
if (Mask[0] < 4 && (Mask[1] == -1 || Mask[1] >= 4) &&
Mask[2] < 4 && (Mask[3] == -1 || Mask[3] >= 4)) {
for (int i = 0; i < 4; ++i)
if (Mask[i] >= 0 && Mask[i] < 4)
V1Mask[i] = Mask[i];
- else if (Mask[i] >= 4)
- V2Mask[i] = Mask[i] - 4;
+ else if (Mask[i] >= 4)
+ V2Mask[i] = Mask[i] - 4;
V1 = DAG.getVectorShuffle(MVT::v4f64, DL, V1, DAG.getUNDEF(MVT::v4f64), V1Mask);
V2 = DAG.getVectorShuffle(MVT::v4f64, DL, V2, DAG.getUNDEF(MVT::v4f64), V2Mask);
}
SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
+ // A vselect where all conditions and data are constants can be optimized into
+ // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(0).getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(1).getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode()))
+ return SDValue();
+
SDValue BlendOp = LowerVSELECTtoBlend(Op, Subtarget, DAG);
if (BlendOp.getNode())
return BlendOp;
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
isa<ConstantSDNode>(N2)) {
unsigned Opc;
- if (VT == MVT::v8i16)
+ if (VT == MVT::v8i16) {
Opc = X86ISD::PINSRW;
- else if (VT == MVT::v16i8)
- Opc = X86ISD::PINSRB;
- else
+ } else {
+ assert(VT == MVT::v16i8);
Opc = X86ISD::PINSRB;
+ }
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument.
if (VT == MVT::i1) {
assert((InVT.isInteger() && (InVT.getSizeInBits() <= 64)) &&
"Invalid scalar TRUNCATE operation");
- if (InVT == MVT::i32)
+ if (InVT.getSizeInBits() >= 32)
return SDValue();
- if (InVT.getSizeInBits() == 64)
- In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::i32, In);
- else if (InVT.getSizeInBits() < 32)
- In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
+ In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
return DAG.getNode(ISD::TRUNCATE, DL, VT, In);
}
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
}
enum IntrinsicType {
- GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST
+ GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX
};
struct IntrinsicData {
IntrinsicData(RDTSC, X86ISD::RDTSCP_DAG, 0)));
IntrMap.insert(std::make_pair(Intrinsic::x86_rdpmc,
IntrinsicData(RDPMC, X86ISD::RDPMC_DAG, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_addcarryx_u32,
+ IntrinsicData(ADX, X86ISD::ADC, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_addcarryx_u64,
+ IntrinsicData(ADX, X86ISD::ADC, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_addcarry_u32,
+ IntrinsicData(ADX, X86ISD::ADC, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_addcarry_u64,
+ IntrinsicData(ADX, X86ISD::ADC, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_subborrow_u32,
+ IntrinsicData(ADX, X86ISD::SBB, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_subborrow_u64,
+ IntrinsicData(ADX, X86ISD::SBB, 0)));
Initialized = true;
}
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
Ret, SDValue(InTrans.getNode(), 1));
}
+ // ADC/ADCX/SBB
+ case ADX: {
+ SmallVector<SDValue, 2> Results;
+ SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
+ SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other);
+ SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
+ DAG.getConstant(-1, MVT::i8));
+ SDValue Res = DAG.getNode(Intr.Opc0, dl, VTs, Op.getOperand(3),
+ Op.getOperand(4), GenCF.getValue(1));
+ SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0),
+ Op.getOperand(5), MachinePointerInfo(),
+ false, false, 0);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_B, MVT::i8),
+ Res.getValue(1));
+ Results.push_back(SetCC);
+ Results.push_back(Store);
+ return DAG.getMergeValues(Results, dl);
+ }
}
llvm_unreachable("Unknown Intrinsic Type");
}
if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();
+ // A vselect where all conditions and data are constants can be optimized into
+ // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
+ if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
+ return SDValue();
+
unsigned MaskValue = 0;
if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
return SDValue();