From: Simon Pilgrim Date: Sat, 25 Jul 2015 11:17:35 +0000 (+0000) Subject: [X86][SSE] Added additional vector sign/zero extension tests. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=af824151a154f3888795c839babb92a9dc7569a0;p=oota-llvm.git [X86][SSE] Added additional vector sign/zero extension tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243212 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll index 8e79493ddd0..5938b56acb5 100644 --- a/test/CodeGen/X86/vector-sext.ll +++ b/test/CodeGen/X86/vector-sext.ll @@ -7,6 +7,337 @@ ; Just one 32-bit run to make sure we do reasonable things there. ; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=i686 -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41 +define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_16i8_to_8i16: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: psraw $8, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_16i8_to_8i16: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: psraw $8, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_16i8_to_8i16: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: sext_16i8_to_8i16: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 +; AVX-NEXT: retq +; +; X32-SSE41-LABEL: sext_16i8_to_8i16: +; X32-SSE41: # BB#0: # %entry +; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 +; X32-SSE41-NEXT: retl +entry: + %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> + %C = sext <8 x i8> %B to <8 x i16> + ret <8 x i16> %C +} + +define <16 x i16> @sext_16i8_to_16i16(<16 x i8> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_16i8_to_16i16: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: psraw $8, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: psraw $8, %xmm1 +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_16i8_to_16i16: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSSE3-NEXT: psraw $8, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSSE3-NEXT: psraw $8, %xmm1 +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_16i8_to_16i16: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovsxbw %xmm0, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE41-NEXT: pmovsxbw %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: sext_16i8_to_16i16: +; AVX1: # BB#0: # %entry +; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: sext_16i8_to_16i16: +; AVX2: # BB#0: # %entry +; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 +; AVX2-NEXT: retq +; +; X32-SSE41-LABEL: sext_16i8_to_16i16: +; X32-SSE41: # BB#0: # %entry +; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm2 +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm1 +; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 +; X32-SSE41-NEXT: retl +entry: + %B = sext <16 x i8> %A to <16 x i16> + ret <16 x i16> %B +} + +define <4 x i32> @sext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_16i8_to_4i32: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $24, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_16i8_to_4i32: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSSE3-NEXT: psrad $24, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_16i8_to_4i32: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: sext_16i8_to_4i32: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; AVX-NEXT: retq +; +; X32-SSE41-LABEL: sext_16i8_to_4i32: +; X32-SSE41: # BB#0: # %entry +; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm0 +; X32-SSE41-NEXT: retl +entry: + %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> + %C = sext <4 x i8> %B to <4 x i32> + ret <4 x i32> %C +} + +define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_16i8_to_8i32: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: psrad $24, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: psrad $24, %xmm1 +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_16i8_to_8i32: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSSE3-NEXT: psrad $24, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: psrad $24, %xmm1 +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_16i8_to_8i32: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovsxbd %xmm0, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: sext_16i8_to_8i32: +; AVX1: # BB#0: # %entry +; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: sext_16i8_to_8i32: +; AVX2: # BB#0: # %entry +; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX2-NEXT: vpslld $24, %ymm0, %ymm0 +; AVX2-NEXT: vpsrad $24, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; X32-SSE41-LABEL: sext_16i8_to_8i32: +; X32-SSE41: # BB#0: # %entry +; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm2 +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm1 +; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 +; X32-SSE41-NEXT: retl +entry: + %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> + %C = sext <8 x i8> %B to <8 x i32> + ret <8 x i32> %C +} + +define <2 x i64> @sext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_16i8_to_2i64: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: psrad $24, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_16i8_to_2i64: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: psrad $31, %xmm1 +; SSSE3-NEXT: psrad $24, %xmm0 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_16i8_to_2i64: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovsxbq %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: sext_16i8_to_2i64: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 +; AVX-NEXT: retq +; +; X32-SSE41-LABEL: sext_16i8_to_2i64: +; X32-SSE41: # BB#0: # %entry +; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm0 +; X32-SSE41-NEXT: retl +entry: + %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> + %C = sext <2 x i8> %B to <2 x i64> + ret <2 x i64> %C +} + +define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_16i8_to_4i64: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: psrad $24, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: psrld $16, %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: psrad $31, %xmm0 +; SSE2-NEXT: psrad $24, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_16i8_to_4i64: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSSE3-NEXT: movdqa %xmm2, %xmm1 +; SSSE3-NEXT: psrad $31, %xmm1 +; SSSE3-NEXT: psrad $24, %xmm2 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSSE3-NEXT: psrld $16, %xmm0 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: psrad $31, %xmm0 +; SSSE3-NEXT: psrad $24, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_16i8_to_4i64: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovsxbq %xmm0, %xmm2 +; SSE41-NEXT: psrld $16, %xmm0 +; SSE41-NEXT: pmovsxbq %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: sext_16i8_to_4i64: +; AVX1: # BB#0: # %entry +; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: sext_16i8_to_4i64: +; AVX2: # BB#0: # %entry +; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX2-NEXT: vpslld $24, %xmm0, %xmm0 +; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0 +; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 +; AVX2-NEXT: retq +; +; X32-SSE41-LABEL: sext_16i8_to_4i64: +; X32-SSE41: # BB#0: # %entry +; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm2 +; X32-SSE41-NEXT: psrld $16, %xmm0 +; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm1 +; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 +; X32-SSE41-NEXT: retl +entry: + %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> + %C = sext <4 x i8> %B to <4 x i64> + ret <4 x i64> %C +} + +define <4 x i32> @sext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_8i16_to_4i32: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_8i16_to_4i32: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSSE3-NEXT: psrad $16, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_8i16_to_4i32: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: sext_8i16_to_4i32: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 +; AVX-NEXT: retq +; +; X32-SSE41-LABEL: sext_8i16_to_4i32: +; X32-SSE41: # BB#0: # %entry +; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm0 +; X32-SSE41-NEXT: retl +entry: + %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> + %C = sext <4 x i16> %B to <4 x i32> + ret <4 x i32> %C +} + define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { ; SSE2-LABEL: sext_8i16_to_8i32: ; SSE2: # BB#0: # %entry @@ -58,7 +389,151 @@ define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp ; X32-SSE41-NEXT: retl entry: %B = sext <8 x i16> %A to <8 x i32> - ret <8 x i32>%B + ret <8 x i32> %B +} + +define <2 x i64> @sext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_8i16_to_2i64: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_8i16_to_2i64: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: psrad $31, %xmm1 +; SSSE3-NEXT: psrad $16, %xmm0 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_8i16_to_2i64: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovsxwq %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: sext_8i16_to_2i64: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 +; AVX-NEXT: retq +; +; X32-SSE41-LABEL: sext_8i16_to_2i64: +; X32-SSE41: # BB#0: # %entry +; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm0 +; X32-SSE41-NEXT: retl +entry: + %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> + %C = sext <2 x i16> %B to <2 x i64> + ret <2 x i64> %C +} + +define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_8i16_to_4i64: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: psrad $31, %xmm0 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_8i16_to_4i64: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSSE3-NEXT: movdqa %xmm2, %xmm1 +; SSSE3-NEXT: psrad $31, %xmm1 +; SSSE3-NEXT: psrad $16, %xmm2 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: psrad $31, %xmm0 +; SSSE3-NEXT: psrad $16, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_8i16_to_4i64: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovsxwq %xmm0, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE41-NEXT: pmovsxwq %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: sext_8i16_to_4i64: +; AVX1: # BB#0: # %entry +; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: sext_8i16_to_4i64: +; AVX2: # BB#0: # %entry +; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: vpslld $16, %xmm0, %xmm0 +; AVX2-NEXT: vpsrad $16, %xmm0, %xmm0 +; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 +; AVX2-NEXT: retq +; +; X32-SSE41-LABEL: sext_8i16_to_4i64: +; X32-SSE41: # BB#0: # %entry +; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm2 +; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm1 +; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 +; X32-SSE41-NEXT: retl +entry: + %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> + %C = sext <4 x i16> %B to <4 x i64> + ret <4 x i64> %C +} + +define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_4i32_to_2i64: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_4i32_to_2i64: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: psrad $31, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_4i32_to_2i64: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovsxdq %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: sext_4i32_to_2i64: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX-NEXT: retq +; +; X32-SSE41-LABEL: sext_4i32_to_2i64: +; X32-SSE41: # BB#0: # %entry +; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm0 +; X32-SSE41-NEXT: retl +entry: + %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> + %C = sext <2 x i32> %B to <2 x i64> + ret <2 x i64> %C } define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { @@ -146,7 +621,10 @@ define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp { ; ; X32-SSE41-LABEL: sext_2i8_to_i32: ; X32-SSE41: # BB#0: # %entry -; X32-SSE41: pmovsxbw %xmm0, %xmm0 +; X32-SSE41-NEXT: pushl %eax +; X32-SSE41-NEXT: .Ltmp0: +; X32-SSE41-NEXT: .cfi_def_cfa_offset 8 +; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 ; X32-SSE41-NEXT: movd %xmm0, %eax ; X32-SSE41-NEXT: popl %edx ; X32-SSE41-NEXT: retl @@ -157,32 +635,32 @@ entry: ret i32 %Bc } -define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) { -; SSE2-LABEL: load_sext_test1: +define <4 x i32> @load_sext_4i16_to_4i32(<4 x i16> *%ptr) { +; SSE2-LABEL: load_sext_4i16_to_4i32: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-NEXT: psrad $16, %xmm0 ; SSE2-NEXT: retq ; -; SSSE3-LABEL: load_sext_test1: +; SSSE3-LABEL: load_sext_4i16_to_4i32: ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSSE3-NEXT: psrad $16, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: load_sext_test1: +; SSE41-LABEL: load_sext_4i16_to_4i32: ; SSE41: # BB#0: # %entry ; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: load_sext_test1: +; AVX-LABEL: load_sext_4i16_to_4i32: ; AVX: # BB#0: # %entry ; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 ; AVX-NEXT: retq ; -; X32-SSE41-LABEL: load_sext_test1: +; X32-SSE41-LABEL: load_sext_4i16_to_4i32: ; X32-SSE41: # BB#0: # %entry ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0 @@ -193,8 +671,8 @@ entry: ret <4 x i32>%Y } -define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) { -; SSE2-LABEL: load_sext_test2: +define <4 x i32> @load_sext_4i8_to_4i32(<4 x i8> *%ptr) { +; SSE2-LABEL: load_sext_4i8_to_4i32: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -202,7 +680,7 @@ define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) { ; SSE2-NEXT: psrad $24, %xmm0 ; SSE2-NEXT: retq ; -; SSSE3-LABEL: load_sext_test2: +; SSSE3-LABEL: load_sext_4i8_to_4i32: ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -210,17 +688,17 @@ define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) { ; SSSE3-NEXT: psrad $24, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: load_sext_test2: +; SSE41-LABEL: load_sext_4i8_to_4i32: ; SSE41: # BB#0: # %entry ; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: load_sext_test2: +; AVX-LABEL: load_sext_4i8_to_4i32: ; AVX: # BB#0: # %entry ; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 ; AVX-NEXT: retq ; -; X32-SSE41-LABEL: load_sext_test2: +; X32-SSE41-LABEL: load_sext_4i8_to_4i32: ; X32-SSE41: # BB#0: # %entry ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE41-NEXT: pmovsxbd (%eax), %xmm0 @@ -231,8 +709,8 @@ entry: ret <4 x i32>%Y } -define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) { -; SSE2-LABEL: load_sext_test3: +define <2 x i64> @load_sext_2i8_to_2i64(<2 x i8> *%ptr) { +; SSE2-LABEL: load_sext_2i8_to_2i64: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movzwl (%rdi), %eax ; SSE2-NEXT: movd %eax, %xmm0 @@ -244,7 +722,7 @@ define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: load_sext_test3: +; SSSE3-LABEL: load_sext_2i8_to_2i64: ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movzwl (%rdi), %eax ; SSSE3-NEXT: movd %eax, %xmm0 @@ -256,17 +734,17 @@ define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: load_sext_test3: +; SSE41-LABEL: load_sext_2i8_to_2i64: ; SSE41: # BB#0: # %entry ; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: load_sext_test3: +; AVX-LABEL: load_sext_2i8_to_2i64: ; AVX: # BB#0: # %entry ; AVX-NEXT: vpmovsxbq (%rdi), %xmm0 ; AVX-NEXT: retq ; -; X32-SSE41-LABEL: load_sext_test3: +; X32-SSE41-LABEL: load_sext_2i8_to_2i64: ; X32-SSE41: # BB#0: # %entry ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 @@ -277,8 +755,8 @@ entry: ret <2 x i64>%Y } -define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) { -; SSE2-LABEL: load_sext_test4: +define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) { +; SSE2-LABEL: load_sext_2i16_to_2i64: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] @@ -288,7 +766,7 @@ define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: load_sext_test4: +; SSSE3-LABEL: load_sext_2i16_to_2i64: ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] @@ -298,17 +776,17 @@ define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: load_sext_test4: +; SSE41-LABEL: load_sext_2i16_to_2i64: ; SSE41: # BB#0: # %entry ; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: load_sext_test4: +; AVX-LABEL: load_sext_2i16_to_2i64: ; AVX: # BB#0: # %entry ; AVX-NEXT: vpmovsxwq (%rdi), %xmm0 ; AVX-NEXT: retq ; -; X32-SSE41-LABEL: load_sext_test4: +; X32-SSE41-LABEL: load_sext_2i16_to_2i64: ; X32-SSE41: # BB#0: # %entry ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0 @@ -319,8 +797,8 @@ entry: ret <2 x i64>%Y } -define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) { -; SSE2-LABEL: load_sext_test5: +define <2 x i64> @load_sext_2i32_to_2i64(<2 x i32> *%ptr) { +; SSE2-LABEL: load_sext_2i32_to_2i64: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSE2-NEXT: movdqa %xmm0, %xmm1 @@ -328,7 +806,7 @@ define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: load_sext_test5: +; SSSE3-LABEL: load_sext_2i32_to_2i64: ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSSE3-NEXT: movdqa %xmm0, %xmm1 @@ -336,17 +814,17 @@ define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: load_sext_test5: +; SSE41-LABEL: load_sext_2i32_to_2i64: ; SSE41: # BB#0: # %entry ; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: load_sext_test5: +; AVX-LABEL: load_sext_2i32_to_2i64: ; AVX: # BB#0: # %entry ; AVX-NEXT: vpmovsxdq (%rdi), %xmm0 ; AVX-NEXT: retq ; -; X32-SSE41-LABEL: load_sext_test5: +; X32-SSE41-LABEL: load_sext_2i32_to_2i64: ; X32-SSE41: # BB#0: # %entry ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE41-NEXT: pmovsxdq (%eax), %xmm0 @@ -357,32 +835,32 @@ entry: ret <2 x i64>%Y } -define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) { -; SSE2-LABEL: load_sext_test6: +define <8 x i16> @load_sext_8i8_to_8i16(<8 x i8> *%ptr) { +; SSE2-LABEL: load_sext_8i8_to_8i16: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psraw $8, %xmm0 ; SSE2-NEXT: retq ; -; SSSE3-LABEL: load_sext_test6: +; SSSE3-LABEL: load_sext_8i8_to_8i16: ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSSE3-NEXT: psraw $8, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: load_sext_test6: +; SSE41-LABEL: load_sext_8i8_to_8i16: ; SSE41: # BB#0: # %entry ; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 ; SSE41-NEXT: retq ; -; AVX-LABEL: load_sext_test6: +; AVX-LABEL: load_sext_8i8_to_8i16: ; AVX: # BB#0: # %entry ; AVX-NEXT: vpmovsxbw (%rdi), %xmm0 ; AVX-NEXT: retq ; -; X32-SSE41-LABEL: load_sext_test6: +; X32-SSE41-LABEL: load_sext_8i8_to_8i16: ; X32-SSE41: # BB#0: # %entry ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0 @@ -460,8 +938,8 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { ret <4 x i64> %extmask } -define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) { -; SSE2-LABEL: sext_16i8_to_16i16: +define <16 x i16> @load_sext_16i8_to_16i16(<16 x i8> *%ptr) { +; SSE2-LABEL: load_sext_16i8_to_16i16: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -471,7 +949,7 @@ define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) { ; SSE2-NEXT: psraw $8, %xmm1 ; SSE2-NEXT: retq ; -; SSSE3-LABEL: sext_16i8_to_16i16: +; SSSE3-LABEL: load_sext_16i8_to_16i16: ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] @@ -481,25 +959,25 @@ define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) { ; SSSE3-NEXT: psraw $8, %xmm1 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: sext_16i8_to_16i16: +; SSE41-LABEL: load_sext_16i8_to_16i16: ; SSE41: # BB#0: # %entry ; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 ; SSE41-NEXT: pmovsxbw 8(%rdi), %xmm1 ; SSE41-NEXT: retq ; -; AVX1-LABEL: sext_16i8_to_16i16: +; AVX1-LABEL: load_sext_16i8_to_16i16: ; AVX1: # BB#0: # %entry ; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0 ; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: sext_16i8_to_16i16: +; AVX2-LABEL: load_sext_16i8_to_16i16: ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovsxbw (%rdi), %ymm0 ; AVX2-NEXT: retq ; -; X32-SSE41-LABEL: sext_16i8_to_16i16: +; X32-SSE41-LABEL: load_sext_16i8_to_16i16: ; X32-SSE41: # BB#0: # %entry ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0 diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll index b119f5eb89f..b17799fd8c8 100644 --- a/test/CodeGen/X86/vector-zext.ll +++ b/test/CodeGen/X86/vector-zext.ll @@ -4,6 +4,281 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 +define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: zext_16i8_to_8i16: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_16i8_to_8i16: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_16i8_to_8i16: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE41-NEXT: retq +; +; AVX-LABEL: zext_16i8_to_8i16: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX-NEXT: retq +entry: + %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> + %C = zext <8 x i8> %B to <8 x i16> + ret <8 x i16> %C +} + +; PR17654 +define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) { +; SSE2-LABEL: zext_16i8_to_16i16: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_16i8_to_16i16: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm2 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_16i8_to_16i16: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: zext_16i8_to_16i16: +; AVX1: # BB#0: # %entry +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: zext_16i8_to_16i16: +; AVX2: # BB#0: # %entry +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX2-NEXT: retq +entry: + %B = zext <16 x i8> %A to <16 x i16> + ret <16 x i16> %B +} + +define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: zext_16i8_to_4i32: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_16i8_to_4i32: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_16i8_to_4i32: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; SSE41-NEXT: retq +; +; AVX-LABEL: zext_16i8_to_4i32: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX-NEXT: retq +entry: + %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> + %C = zext <4 x i8> %B to <4 x i32> + ret <4 x i32> %C +} + +define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: zext_16i8_to_8i32: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] +; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_16i8_to_8i32: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm2 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] +; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_16i8_to_8i32: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero +; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] +; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: zext_16i8_to_8i32: +; AVX1: # BB#0: # %entry +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: zext_16i8_to_8i32: +; AVX2: # BB#0: # %entry +; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +entry: + %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> + %C = zext <8 x i8> %B to <8 x i32> + ret <8 x i32> %C +} + +define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: zext_16i8_to_2i64: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1] +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_16i8_to_2i64: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_16i8_to_2i64: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: zext_16i8_to_2i64: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> + %C = zext <2 x i8> %B to <2 x i64> + ret <2 x i64> %C +} + +define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: zext_16i8_to_4i64: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255] +; SSE2-NEXT: pand %xmm3, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,5,6,7] +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_16i8_to_4i64: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movdqa %xmm0, %xmm2 +; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255] +; SSSE3-NEXT: pand %xmm1, %xmm2 +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,1,1,2,2,3,3,3,3,5,5,2,2,3,3] +; SSSE3-NEXT: pand %xmm0, %xmm1 +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_16i8_to_4i64: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255] +; SSE41-NEXT: pand %xmm1, %xmm2 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,1,1,2,2,3,3,3,3,5,5,2,2,3,3] +; SSE41-NEXT: pand %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: zext_16i8_to_4i64: +; AVX1: # BB#0: # %entry +; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: zext_16i8_to_4i64: +; AVX2: # BB#0: # %entry +; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero +; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +entry: + %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> + %C = zext <4 x i8> %B to <4 x i64> + ret <4 x i64> %C +} + +define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: zext_8i16_to_4i32: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_8i16_to_4i32: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_8i16_to_4i32: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SSE41-NEXT: retq +; +; AVX-LABEL: zext_8i16_to_4i32: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX-NEXT: retq +entry: + %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> + %C = zext <4 x i16> %B to <4 x i32> + ret <4 x i32> %C +} + define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { ; SSE2-LABEL: zext_8i16_to_8i32: ; SSE2: # BB#0: # %entry @@ -48,6 +323,124 @@ entry: ret <8 x i32>%B } +define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: zext_8i16_to_2i64: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_8i16_to_2i64: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_8i16_to_2i64: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: zext_8i16_to_2i64: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> + %C = zext <2 x i16> %B to <2 x i64> + ret <2 x i64> %C +} + +define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: zext_8i16_to_4i64: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,3] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,5,5,6,7] +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535] +; SSE2-NEXT: pand %xmm3, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,5,6,7] +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_8i16_to_4i64: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3] +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4,5,2,3,4,5,6,7,6,7,10,11,4,5,6,7] +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535] +; SSSE3-NEXT: pand %xmm2, %xmm1 +; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_8i16_to_4i64: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535] +; SSE41-NEXT: pand %xmm1, %xmm2 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,2,3,4,5,6,7,6,7,10,11,4,5,6,7] +; SSE41-NEXT: pand %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: zext_8i16_to_4i64: +; AVX1: # BB#0: # %entry +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: zext_8i16_to_4i64: +; AVX2: # BB#0: # %entry +; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +entry: + %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> + %C = zext <4 x i16> %B to <4 x i64> + ret <4 x i64> %C +} + +define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: zext_4i32_to_2i64: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_4i32_to_2i64: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_4i32_to_2i64: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: zext_4i32_to_2i64: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> + %C = zext <2 x i32> %B to <2 x i64> + ret <2 x i64> %C +} + define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { ; SSE2-LABEL: zext_4i32_to_4i64: ; SSE2: # BB#0: # %entry @@ -148,51 +541,6 @@ entry: ret <8 x i32> %t } -; PR17654 -define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) { -; SSE2-LABEL: zext_16i8_to_16i16: -; SSE2: # BB#0: # %entry -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: retq -; -; SSSE3-LABEL: zext_16i8_to_16i16: -; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: movdqa %xmm0, %xmm1 -; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: zext_16i8_to_16i16: -; SSE41: # BB#0: # %entry -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE41-NEXT: retq -; -; AVX1-LABEL: zext_16i8_to_16i16: -; AVX1: # BB#0: # %entry -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: zext_16i8_to_16i16: -; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: retq -entry: - %t = zext <16 x i8> %z to <16 x i16> - ret <16 x i16> %t -} - define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) { ; SSE2-LABEL: load_zext_16i8_to_16i16: ; SSE2: # BB#0: # %entry