return true;
}
+/// \brief Check wether all of one set of inputs to a shuffle mask are in place.
+///
+/// Mask entries pointing at the other input or undef will be skipped.
+static bool isShuffleMaskInputInPlace(ArrayRef<int> Mask, bool LoInput = true) {
+ int Size = Mask.size();
+ for (int i = 0; i < Size; ++i) {
+ int M = Mask[i];
+ if (M == -1 || (LoInput && M >= 4) || (!LoInput && M < 4))
+ continue;
+ if (M - (LoInput ? 0 : Size) != i)
+ return false;
+ }
+ return true;
+}
+
// Hide this symbol with an anonymous namespace instead of 'static' so that MSVC
// 2013 will allow us to use it as a non-type template parameter.
namespace {
int V2Index =
std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
Mask.begin();
+
+ // Check for whether we can use INSERTPS to perform the blend. We only use
+ // INSERTPS when the V1 elements are already in the correct locations
+ // because otherwise we can just always use two SHUFPS instructions which
+ // are much smaller to encode than a SHUFPS and an INSERTPS.
+ if (Subtarget->hasSSE41() &&
+ isShuffleMaskInputInPlace(Mask, /*LoInput*/ true)) {
+ // Insert the V2 element into the desired position.
+ SDValue InsertPSMask =
+ DAG.getIntPtrConstant(Mask[V2Index] << 6 | V2Index << 4);
+ return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
+ InsertPSMask);
+ }
+
// Compute the index adjacent to V2Index and in the same half by toggling
// the low bit.
int V2AdjIndex = V2Index ^ 1;
}
define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_0124
-; ALL: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
-; ALL-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0]
-; ALL-NEXT: retq
+; SSE2-LABEL: @shuffle_v4i32_0124
+; SSE2: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
+; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0]
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: @shuffle_v4i32_0124
+; SSE41: insertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: @shuffle_v4i32_0124
+; AVX1: vinsertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0]
+; AVX1-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
ret <4 x i32> %shuffle
}