1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6 define <4 x i32> @trunc2x2i64(<2 x i64> %a, <2 x i64> %b) {
7 ; SSE2-LABEL: trunc2x2i64:
8 ; SSE2: # BB#0: # %entry
9 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
10 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
11 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
14 ; SSSE3-LABEL: trunc2x2i64:
15 ; SSSE3: # BB#0: # %entry
16 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
17 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
18 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
21 ; SSE41-LABEL: trunc2x2i64:
22 ; SSE41: # BB#0: # %entry
23 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
24 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
25 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
28 ; AVX-LABEL: trunc2x2i64:
29 ; AVX: # BB#0: # %entry
30 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
31 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
32 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
37 %0 = trunc <2 x i64> %a to <2 x i32>
38 %1 = trunc <2 x i64> %b to <2 x i32>
39 %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
43 define i64 @trunc2i64(<2 x i64> %inval) {
44 ; SSE-LABEL: trunc2i64:
45 ; SSE: # BB#0: # %entry
46 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
47 ; SSE-NEXT: movd %xmm0, %rax
50 ; AVX-LABEL: trunc2i64:
51 ; AVX: # BB#0: # %entry
52 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
53 ; AVX-NEXT: vmovq %xmm0, %rax
58 %0 = trunc <2 x i64> %inval to <2 x i32>
59 %1 = bitcast <2 x i32> %0 to i64
63 define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) {
64 ; SSE2-LABEL: trunc2x4i32:
65 ; SSE2: # BB#0: # %entry
66 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
67 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
68 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
69 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
70 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
71 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
72 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
75 ; SSSE3-LABEL: trunc2x4i32:
76 ; SSSE3: # BB#0: # %entry
77 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
78 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
79 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
80 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
83 ; SSE41-LABEL: trunc2x4i32:
84 ; SSE41: # BB#0: # %entry
85 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
86 ; SSE41-NEXT: pshufb %xmm2, %xmm1
87 ; SSE41-NEXT: pshufb %xmm2, %xmm0
88 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
91 ; AVX-LABEL: trunc2x4i32:
92 ; AVX: # BB#0: # %entry
93 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
94 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
95 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
96 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
103 %0 = trunc <4 x i32> %a to <4 x i16>
104 %1 = trunc <4 x i32> %b to <4 x i16>
105 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
109 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
110 define i64 @trunc4i32(<4 x i32> %inval) {
111 ; SSE2-LABEL: trunc4i32:
112 ; SSE2: # BB#0: # %entry
113 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
114 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
115 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
116 ; SSE2-NEXT: movd %xmm0, %rax
119 ; SSSE3-LABEL: trunc4i32:
120 ; SSSE3: # BB#0: # %entry
121 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
122 ; SSSE3-NEXT: movd %xmm0, %rax
125 ; SSE41-LABEL: trunc4i32:
126 ; SSE41: # BB#0: # %entry
127 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
128 ; SSE41-NEXT: movd %xmm0, %rax
131 ; AVX-LABEL: trunc4i32:
132 ; AVX: # BB#0: # %entry
133 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
134 ; AVX-NEXT: vmovq %xmm0, %rax
141 %0 = trunc <4 x i32> %inval to <4 x i16>
142 %1 = bitcast <4 x i16> %0 to i64
146 define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) {
147 ; SSE2-LABEL: trunc2x8i16:
148 ; SSE2: # BB#0: # %entry
149 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
150 ; SSE2-NEXT: pand %xmm2, %xmm1
151 ; SSE2-NEXT: pand %xmm2, %xmm0
152 ; SSE2-NEXT: packuswb %xmm1, %xmm0
155 ; SSSE3-LABEL: trunc2x8i16:
156 ; SSSE3: # BB#0: # %entry
157 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
158 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
159 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
160 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
163 ; SSE41-LABEL: trunc2x8i16:
164 ; SSE41: # BB#0: # %entry
165 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
166 ; SSE41-NEXT: pshufb %xmm2, %xmm1
167 ; SSE41-NEXT: pshufb %xmm2, %xmm0
168 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
171 ; AVX-LABEL: trunc2x8i16:
172 ; AVX: # BB#0: # %entry
173 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
174 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
175 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
176 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
183 %0 = trunc <8 x i16> %a to <8 x i8>
184 %1 = trunc <8 x i16> %b to <8 x i8>
185 %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
189 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
190 define i64 @trunc8i16(<8 x i16> %inval) {
191 ; SSE2-LABEL: trunc8i16:
192 ; SSE2: # BB#0: # %entry
193 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
194 ; SSE2-NEXT: packuswb %xmm0, %xmm0
195 ; SSE2-NEXT: movd %xmm0, %rax
198 ; SSSE3-LABEL: trunc8i16:
199 ; SSSE3: # BB#0: # %entry
200 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
201 ; SSSE3-NEXT: movd %xmm0, %rax
204 ; SSE41-LABEL: trunc8i16:
205 ; SSE41: # BB#0: # %entry
206 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
207 ; SSE41-NEXT: movd %xmm0, %rax
210 ; AVX-LABEL: trunc8i16:
211 ; AVX: # BB#0: # %entry
212 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
213 ; AVX-NEXT: vmovq %xmm0, %rax
220 %0 = trunc <8 x i16> %inval to <8 x i8>
221 %1 = bitcast <8 x i8> %0 to i64
225 define <16 x i8> @trunc16i64_const() {
226 ; SSE-LABEL: trunc16i64_const
227 ; SSE: # BB#0: # %entry
228 ; SSE-NEXT: xorps %xmm0, %xmm0
231 ; AVX-LABEL: trunc16i64_const
232 ; AVX: # BB#0: # %entry
233 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
237 %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
238 %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26>