setPrefFunctionAlignment(4); // 2^4 bytes.
- InitIntrinsicTables();
+ verifyIntrinsicTables();
}
// This has so far only been implemented for 64-bit MachO.
return true;
}
-/// \brief Check wether all of one set of inputs to a shuffle mask are in place.
-///
-/// Mask entries pointing at the other input or undef will be skipped.
-static bool isShuffleMaskInputInPlace(ArrayRef<int> Mask, bool LoInput = true) {
- int Size = Mask.size();
- for (int i = 0; i < Size; ++i) {
- int M = Mask[i];
- if (M == -1 || (LoInput && M >= 4) || (!LoInput && M < 4))
- continue;
- if (M - (LoInput ? 0 : Size) != i)
- return false;
- }
- return true;
-}
-
// Hide this symbol with an anonymous namespace instead of 'static' so that MSVC
// 2013 will allow us to use it as a non-type template parameter.
namespace {
// INSERTPS when the V1 elements are already in the correct locations
// because otherwise we can just always use two SHUFPS instructions which
// are much smaller to encode than a SHUFPS and an INSERTPS.
- if (Subtarget->hasSSE41() &&
- isShuffleMaskInputInPlace(Mask, /*LoInput*/ true)) {
- // Insert the V2 element into the desired position.
- SDValue InsertPSMask =
- DAG.getIntPtrConstant(Mask[V2Index] << 6 | V2Index << 4);
- return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
- InsertPSMask);
+ if (Subtarget->hasSSE41()) {
+ // When using INSERTPS we can zero any lane of the destination. Collect
+ // the zero inputs into a mask and drop them from the lanes of V1 which
+ // actually need to be present as inputs to the INSERTPS.
+ unsigned ZMask = 0;
+ if (ISD::isBuildVectorAllZeros(V1.getNode())) {
+ ZMask = 0xF ^ (1 << V2Index);
+ } else if (V1.getOpcode() == ISD::BUILD_VECTOR) {
+ for (int i = 0; i < 4; ++i) {
+ int M = Mask[i];
+ if (M >= 4)
+ continue;
+ if (M > -1) {
+ SDValue Input = V1.getOperand(M);
+ if (Input.getOpcode() != ISD::UNDEF &&
+ !X86::isZeroNode(Input)) {
+ // A non-zero input!
+ ZMask = 0;
+ break;
+ }
+ }
+ ZMask |= 1 << i;
+ }
+ }
+
+ // Synthesize a shuffle mask for the non-zero and non-v2 inputs.
+ int InsertShuffleMask[4] = {-1, -1, -1, -1};
+ for (int i = 0; i < 4; ++i)
+ if (i != V2Index && (ZMask & (1 << i)) == 0)
+ InsertShuffleMask[i] = Mask[i];
+
+ if (isNoopShuffleMask(InsertShuffleMask)) {
+ // Replace V1 with undef if nothing from V1 survives the INSERTPS.
+ if ((ZMask | 1 << V2Index) == 0xF)
+ V1 = DAG.getUNDEF(MVT::v4f32);
+
+ // Insert the V2 element into the desired position.
+ SDValue InsertPSMask =
+ DAG.getIntPtrConstant(Mask[V2Index] << 6 | V2Index << 4 | ZMask);
+ return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
+ InsertPSMask);
+ }
}
// Compute the index adjacent to V2Index and in the same half by toggling
SDLoc dl(Op);
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- const IntrinsicData* IntrData = GetIntrinsicWithoutChain(IntNo);
+ const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
if (IntrData) {
switch(IntrData->Type) {
case INTR_TYPE_1OP:
SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- const IntrinsicData* IntrData = GetIntrinsicWithChain(IntNo);
+ const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
if (!IntrData)
return SDValue();