}
}
-/// \brief Tiny helper function to test whether adjacent masks are sequential.
-static bool areAdjacentMasksSequential(ArrayRef<int> Mask) {
+/// \brief Tiny helper function to test whether a shuffle mask could be
+/// simplified by widening the elements being shuffled.
+static bool canWidenShuffleElements(ArrayRef<int> Mask) {
for (int i = 0, Size = Mask.size(); i < Size; i += 2)
- if (Mask[i] + 1 != Mask[i+1])
+ if (Mask[i] % 2 != 0 || Mask[i] + 1 != Mask[i+1])
return false;
return true;
// but it might be interesting to form i128 integers to handle flipping the
// low and high halves of AVX 256-bit vectors.
if (VT.isInteger() && VT.getScalarSizeInBits() < 64 &&
- areAdjacentMasksSequential(Mask)) {
+ canWidenShuffleElements(Mask)) {
SmallVector<int, 8> NewMask;
for (int i = 0, Size = Mask.size(); i < Size; i += 2)
NewMask.push_back(Mask[i] / 2);
// See if this reduces to a PSHUFD which is no more expensive and can
// combine with more operations.
- if (Mask[0] % 2 == 0 && Mask[2] % 2 == 0 &&
- areAdjacentMasksSequential(Mask)) {
+ if (canWidenShuffleElements(Mask)) {
int DMask[] = {-1, -1, -1, -1};
int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
DMask[DOffset + 0] = DOffset + Mask[0] / 2;
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
ret <4 x i32> %shuffle
}
+define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SSE2-LABEL: @shuffle_v4i32_0112
+; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm0[0,1,1,2]
+; CHECK-SSE2-NEXT: retq
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
+ ret <4 x i32> %shuffle
+}
define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) {
; CHECK-SSE2-LABEL: @shuffle_v4i32_0300
; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm0[0,3,0,0]