// value of the first element.
// E.g. <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
// <-1, -1, 0, 1, ...> is treated as <IDX, IDX+1, 0, 1, ...>. IDX is
- // equal to the ExpectedElt. For this case, ExpectedElt is (NumElts*2 - 2).
+ // equal to the ExpectedElt.
Imm = (M[0] >= 0) ? static_cast<unsigned>(M[0]) : ExpectedElt.getZExtValue();
- // Adjust the index value if the source operands will be swapped.
- if (Imm >= NumElts) {
+ // If no beginning UNDEFs, do swap when M[0] >= NumElts.
+ if (M[0] >= 0 && Imm >= NumElts) {
ReverseEXT = true;
Imm -= NumElts;
+ } else if (M[0] < 0) {
+ // Only do swap when beginning UNDEFs more than the first real element,
+ if (*FirstRealElt < FirstRealElt - M.begin())
+ ReverseEXT = true;
+ if (Imm >= NumElts)
+ Imm -= NumElts;
}
return true;
--- /dev/null
+; RUN: llc < %s -O0 -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+
+; The following 2 test cases test shufflevector with beginning UNDEF mask.
+define <8 x i16> @test_vext_undef_traverse(<8 x i16> %in) {
+;CHECK-LABEL: test_vext_undef_traverse:
+;CHECK: {{ext.16b.*v0, #4}}
+ %vext = shufflevector <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0>, <8 x i16> %in, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9>
+ ret <8 x i16> %vext
+}
+
+define <8 x i16> @test_vext_undef_traverse2(<8 x i16> %in) {
+;CHECK-LABEL: test_vext_undef_traverse2:
+;CHECK: {{ext.16b.*v0, #6}}
+ %vext = shufflevector <8 x i16> %in, <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2>
+ ret <8 x i16> %vext
+}
+
+define <8 x i8> @test_vext_undef_traverse3(<8 x i8> %in) {
+;CHECK-LABEL: test_vext_undef_traverse3:
+;CHECK: {{ext.8b.*v0, #6}}
+ %vext = shufflevector <8 x i8> %in, <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 5>
+ ret <8 x i8> %vext
+}