Summary: This patch correctly handles undef case of EXTRACT_VECTOR_ELT node where the element index is constant and not less than vector size.
Test Plan:
CodeGen for X86 test included.
Also one incorrect regression test fixed.
Reviewers: qcolombet, chandlerc, hfinkel
Reviewed By: hfinkel
Subscribers: hfinkel, llvm-commits
Differential Revision: http://reviews.llvm.org/D9250
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236584
91177308-0d34-0410-b5e6-
96231b3b80d8
if (N1.getOpcode() == ISD::UNDEF)
return getUNDEF(VT);
+ // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
+ if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+ return getUNDEF(VT);
+
// EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
// expanding copies of large vectors from registers.
if (N2C &&
; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse4.1 | FileCheck %s -check-prefix=SSE41
; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX
-define void @pextrb(i8* nocapture %dst, <16 x i8> %foo) {
+; CHECK-LABEL: extract_i8
+define void @extract_i8(i8* nocapture %dst, <16 x i8> %foo) {
; AVX: vpextrb
; SSE41: pextrb
; AVX-NOT: movb
ret void
}
-define void @pextrw(i16* nocapture %dst, <8 x i16> %foo) {
+; CHECK-LABEL: extract_i16
+define void @extract_i16(i16* nocapture %dst, <8 x i16> %foo) {
; AVX: vpextrw
; SSE41: pextrw
; AVX-NOT: movw
; SSE41-NOT: movw
- %vecext = extractelement <8 x i16> %foo, i32 15
+ %vecext = extractelement <8 x i16> %foo, i32 7
+ store i16 %vecext, i16* %dst, align 1
+ ret void
+}
+
+; CHECK-LABEL: extract_i8_undef
+define void @extract_i8_undef(i8* nocapture %dst, <16 x i8> %foo) {
+; AVX-NOT: vpextrb
+; SSE41-NOT: pextrb
+; AVX-NOT: movb
+; SSE41-NOT: movb
+ %vecext = extractelement <16 x i8> %foo, i32 16 ; undef
+ store i8 %vecext, i8* %dst, align 1
+ ret void
+}
+
+; CHECK-LABEL: extract_i16_undef
+define void @extract_i16_undef(i16* nocapture %dst, <8 x i16> %foo) {
+; AVX-NOT: vpextrw
+; SSE41-NOT: pextrw
+; AVX-NOT: movw
+; SSE41-NOT: movw
+ %vecext = extractelement <8 x i16> %foo, i32 9 ; undef
store i16 %vecext, i16* %dst, align 1
ret void
}
--- /dev/null
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX
+
+
+; CHECK-LABEL: extractelement_index_1:
+define i8 @extractelement_index_1(<32 x i8> %a) nounwind {
+ ; X64: movaps
+ ; AVX: vpextrb $1
+ %b = extractelement <32 x i8> %a, i256 1
+ ret i8 %b
+}
+
+; CHECK-LABEL: extractelement_index_2:
+define i32 @extractelement_index_2(<8 x i32> %a) nounwind {
+ ; X64: pshufd
+ ; AVX: vextractf128 $1
+ ; AVX-NEXT: vpextrd $3
+ %b = extractelement <8 x i32> %a, i64 7
+ ret i32 %b
+}
+
+; CHECK-LABEL: extractelement_index_3:
+define i32 @extractelement_index_3(<8 x i32> %a) nounwind {
+ ; CHECK-NOT: pextr
+ %b = extractelement <8 x i32> %a, i64 15
+ ret i32 %b
+}
+
+; CHECK-LABEL: extractelement_index_4:
+define i32 @extractelement_index_4(<8 x i32> %a) nounwind {
+ ; X64: movd
+ ; AVX: vextractf128 $1
+ ; AVX-NEXT: vmovd
+ %b = extractelement <8 x i32> %a, i256 4
+ ret i32 %b
+}
+
+; CHECK-LABEL: extractelement_index_5:
+define i8 @extractelement_index_5(<32 x i8> %a, i256 %i) nounwind {
+ ; X64: movaps
+ ; AVX: vmovaps
+ %b = extractelement <32 x i8> %a, i256 %i
+ ret i8 %b
+}
+
+; CHECK-LABEL: extractelement_index_6:
+define i8 @extractelement_index_6(<32 x i8> %a) nounwind {
+ ; CHECK-NOT: pextr
+ %b = extractelement <32 x i8> %a, i256 -1
+ ret i8 %b
+}
\ No newline at end of file