1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
9 define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) {
10 ; SSE2-LABEL: trunc8i64_8i32:
11 ; SSE2: # BB#0: # %entry
12 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
13 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
14 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
15 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
16 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
17 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
20 ; SSSE3-LABEL: trunc8i64_8i32:
21 ; SSSE3: # BB#0: # %entry
22 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
23 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
24 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
25 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
26 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
27 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
30 ; SSE41-LABEL: trunc8i64_8i32:
31 ; SSE41: # BB#0: # %entry
32 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
33 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
34 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
35 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
36 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
37 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
40 ; AVX1-LABEL: trunc8i64_8i32:
41 ; AVX1: # BB#0: # %entry
42 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
43 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
44 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
45 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
46 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
47 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
48 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
49 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
50 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
53 ; AVX2-LABEL: trunc8i64_8i32:
54 ; AVX2: # BB#0: # %entry
55 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
56 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
57 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
58 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
61 ; AVX512BW-LABEL: trunc8i64_8i32:
62 ; AVX512BW: # BB#0: # %entry
63 ; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
66 %0 = trunc <8 x i64> %a to <8 x i32>
70 define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
71 ; SSE2-LABEL: trunc8i64_8i16:
72 ; SSE2: # BB#0: # %entry
73 ; SSE2-NEXT: pextrw $4, %xmm1, %eax
74 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
75 ; SSE2-NEXT: pextrw $4, %xmm0, %ecx
76 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
77 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
78 ; SSE2-NEXT: pextrw $4, %xmm3, %edx
79 ; SSE2-NEXT: movd %edx, %xmm1
80 ; SSE2-NEXT: movd %eax, %xmm3
81 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
82 ; SSE2-NEXT: pextrw $4, %xmm2, %eax
83 ; SSE2-NEXT: movd %eax, %xmm1
84 ; SSE2-NEXT: movd %ecx, %xmm2
85 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
86 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
87 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
90 ; SSSE3-LABEL: trunc8i64_8i16:
91 ; SSSE3: # BB#0: # %entry
92 ; SSSE3-NEXT: pextrw $4, %xmm1, %eax
93 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
94 ; SSSE3-NEXT: pextrw $4, %xmm0, %ecx
95 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
96 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
97 ; SSSE3-NEXT: pextrw $4, %xmm3, %edx
98 ; SSSE3-NEXT: movd %edx, %xmm1
99 ; SSSE3-NEXT: movd %eax, %xmm3
100 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
101 ; SSSE3-NEXT: pextrw $4, %xmm2, %eax
102 ; SSSE3-NEXT: movd %eax, %xmm1
103 ; SSSE3-NEXT: movd %ecx, %xmm2
104 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
105 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
106 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
109 ; SSE41-LABEL: trunc8i64_8i16:
110 ; SSE41: # BB#0: # %entry
111 ; SSE41-NEXT: pxor %xmm4, %xmm4
112 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
113 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
114 ; SSE41-NEXT: packusdw %xmm3, %xmm2
115 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
116 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
117 ; SSE41-NEXT: packusdw %xmm1, %xmm0
118 ; SSE41-NEXT: packusdw %xmm2, %xmm0
121 ; AVX1-LABEL: trunc8i64_8i16:
122 ; AVX1: # BB#0: # %entry
123 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
124 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
125 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
126 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
127 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
128 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
129 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
130 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
131 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
132 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
133 ; AVX1-NEXT: vzeroupper
136 ; AVX2-LABEL: trunc8i64_8i16:
137 ; AVX2: # BB#0: # %entry
138 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
139 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
140 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
141 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
142 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
143 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
144 ; AVX2-NEXT: vzeroupper
147 ; AVX512BW-LABEL: trunc8i64_8i16:
148 ; AVX512BW: # BB#0: # %entry
149 ; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
150 ; AVX512BW-NEXT: retq
152 %0 = trunc <8 x i64> %a to <8 x i16>
156 define void @trunc8i64_8i8(<8 x i64> %a) {
157 ; SSE-LABEL: trunc8i64_8i8:
158 ; SSE: # BB#0: # %entry
159 ; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
160 ; SSE-NEXT: pand %xmm4, %xmm3
161 ; SSE-NEXT: pand %xmm4, %xmm2
162 ; SSE-NEXT: packuswb %xmm3, %xmm2
163 ; SSE-NEXT: pand %xmm4, %xmm1
164 ; SSE-NEXT: pand %xmm4, %xmm0
165 ; SSE-NEXT: packuswb %xmm1, %xmm0
166 ; SSE-NEXT: packuswb %xmm2, %xmm0
167 ; SSE-NEXT: packuswb %xmm0, %xmm0
168 ; SSE-NEXT: movq %xmm0, (%rax)
171 ; AVX1-LABEL: trunc8i64_8i8:
172 ; AVX1: # BB#0: # %entry
173 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
174 ; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
175 ; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
176 ; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1
177 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
178 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
179 ; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
180 ; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0
181 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
182 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
183 ; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
184 ; AVX1-NEXT: vmovq %xmm0, (%rax)
185 ; AVX1-NEXT: vzeroupper
188 ; AVX2-LABEL: trunc8i64_8i8:
189 ; AVX2: # BB#0: # %entry
190 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
191 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
192 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
193 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
194 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
195 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
196 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
197 ; AVX2-NEXT: vmovq %xmm0, (%rax)
198 ; AVX2-NEXT: vzeroupper
201 ; AVX512BW-LABEL: trunc8i64_8i8:
202 ; AVX512BW: # BB#0: # %entry
203 ; AVX512BW-NEXT: vpmovqb %zmm0, (%rax)
204 ; AVX512BW-NEXT: retq
206 %0 = trunc <8 x i64> %a to <8 x i8>
207 store <8 x i8> %0, <8 x i8>* undef, align 4
211 define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) {
212 ; SSE2-LABEL: trunc8i32_8i16:
213 ; SSE2: # BB#0: # %entry
214 ; SSE2-NEXT: pslld $16, %xmm1
215 ; SSE2-NEXT: psrad $16, %xmm1
216 ; SSE2-NEXT: pslld $16, %xmm0
217 ; SSE2-NEXT: psrad $16, %xmm0
218 ; SSE2-NEXT: packssdw %xmm1, %xmm0
221 ; SSSE3-LABEL: trunc8i32_8i16:
222 ; SSSE3: # BB#0: # %entry
223 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
224 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
225 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
226 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
229 ; SSE41-LABEL: trunc8i32_8i16:
230 ; SSE41: # BB#0: # %entry
231 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
232 ; SSE41-NEXT: pshufb %xmm2, %xmm1
233 ; SSE41-NEXT: pshufb %xmm2, %xmm0
234 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
237 ; AVX1-LABEL: trunc8i32_8i16:
238 ; AVX1: # BB#0: # %entry
239 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
240 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
241 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
242 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
243 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
244 ; AVX1-NEXT: vzeroupper
247 ; AVX2-LABEL: trunc8i32_8i16:
248 ; AVX2: # BB#0: # %entry
249 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
250 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
251 ; AVX2-NEXT: vzeroupper
254 ; AVX512BW-LABEL: trunc8i32_8i16:
255 ; AVX512BW: # BB#0: # %entry
256 ; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
257 ; AVX512BW-NEXT: retq
259 %0 = trunc <8 x i32> %a to <8 x i16>
263 define void @trunc8i32_8i8(<8 x i32> %a) {
264 ; SSE2-LABEL: trunc8i32_8i8:
265 ; SSE2: # BB#0: # %entry
266 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
267 ; SSE2-NEXT: pand %xmm2, %xmm1
268 ; SSE2-NEXT: pand %xmm2, %xmm0
269 ; SSE2-NEXT: packuswb %xmm1, %xmm0
270 ; SSE2-NEXT: packuswb %xmm0, %xmm0
271 ; SSE2-NEXT: movq %xmm0, (%rax)
274 ; SSSE3-LABEL: trunc8i32_8i8:
275 ; SSSE3: # BB#0: # %entry
276 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
277 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
278 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
279 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
280 ; SSSE3-NEXT: movq %xmm0, (%rax)
283 ; SSE41-LABEL: trunc8i32_8i8:
284 ; SSE41: # BB#0: # %entry
285 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
286 ; SSE41-NEXT: pshufb %xmm2, %xmm1
287 ; SSE41-NEXT: pshufb %xmm2, %xmm0
288 ; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
289 ; SSE41-NEXT: movq %xmm0, (%rax)
292 ; AVX1-LABEL: trunc8i32_8i8:
293 ; AVX1: # BB#0: # %entry
294 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
295 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
296 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
297 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
298 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
299 ; AVX1-NEXT: vmovq %xmm0, (%rax)
300 ; AVX1-NEXT: vzeroupper
303 ; AVX2-LABEL: trunc8i32_8i8:
304 ; AVX2: # BB#0: # %entry
305 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
306 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
307 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
308 ; AVX2-NEXT: vmovq %xmm0, (%rax)
309 ; AVX2-NEXT: vzeroupper
312 ; AVX512BW-LABEL: trunc8i32_8i8:
313 ; AVX512BW: # BB#0: # %entry
314 ; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
315 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
316 ; AVX512BW-NEXT: vmovq %xmm0, (%rax)
317 ; AVX512BW-NEXT: retq
319 %0 = trunc <8 x i32> %a to <8 x i8>
320 store <8 x i8> %0, <8 x i8>* undef, align 4
324 define void @trunc16i32_16i8(<16 x i32> %a) {
325 ; SSE-LABEL: trunc16i32_16i8:
326 ; SSE: # BB#0: # %entry
327 ; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
328 ; SSE-NEXT: pand %xmm4, %xmm3
329 ; SSE-NEXT: pand %xmm4, %xmm2
330 ; SSE-NEXT: packuswb %xmm3, %xmm2
331 ; SSE-NEXT: pand %xmm4, %xmm1
332 ; SSE-NEXT: pand %xmm4, %xmm0
333 ; SSE-NEXT: packuswb %xmm1, %xmm0
334 ; SSE-NEXT: packuswb %xmm2, %xmm0
335 ; SSE-NEXT: movdqu %xmm0, (%rax)
338 ; AVX1-LABEL: trunc16i32_16i8:
339 ; AVX1: # BB#0: # %entry
340 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
341 ; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
342 ; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
343 ; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1
344 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
345 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
346 ; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
347 ; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0
348 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
349 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
350 ; AVX1-NEXT: vmovdqu %xmm0, (%rax)
351 ; AVX1-NEXT: vzeroupper
354 ; AVX2-LABEL: trunc16i32_16i8:
355 ; AVX2: # BB#0: # %entry
356 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
357 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
358 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
359 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
360 ; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
361 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
362 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
363 ; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
364 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
365 ; AVX2-NEXT: vmovdqu %xmm0, (%rax)
366 ; AVX2-NEXT: vzeroupper
369 ; AVX512BW-LABEL: trunc16i32_16i8:
370 ; AVX512BW: # BB#0: # %entry
371 ; AVX512BW-NEXT: vpmovdb %zmm0, (%rax)
372 ; AVX512BW-NEXT: retq
374 %0 = trunc <16 x i32> %a to <16 x i8>
375 store <16 x i8> %0, <16 x i8>* undef, align 4
379 define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) {
380 ; SSE2-LABEL: trunc2x4i64_8i32:
381 ; SSE2: # BB#0: # %entry
382 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
383 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
384 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
385 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
386 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
387 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
390 ; SSSE3-LABEL: trunc2x4i64_8i32:
391 ; SSSE3: # BB#0: # %entry
392 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
393 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
394 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
395 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
396 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
397 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
400 ; SSE41-LABEL: trunc2x4i64_8i32:
401 ; SSE41: # BB#0: # %entry
402 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
403 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
404 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
405 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
406 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
407 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
410 ; AVX1-LABEL: trunc2x4i64_8i32:
411 ; AVX1: # BB#0: # %entry
412 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
413 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
414 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
415 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
416 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
417 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
418 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
419 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
420 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
423 ; AVX2-LABEL: trunc2x4i64_8i32:
424 ; AVX2: # BB#0: # %entry
425 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
426 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
427 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
428 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
431 ; AVX512BW-LABEL: trunc2x4i64_8i32:
432 ; AVX512BW: # BB#0: # %entry
433 ; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
434 ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
435 ; AVX512BW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
436 ; AVX512BW-NEXT: retq
438 %0 = trunc <4 x i64> %a to <4 x i32>
439 %1 = trunc <4 x i64> %b to <4 x i32>
440 %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
444 define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
445 ; SSE2-LABEL: trunc2x4i64_8i16:
446 ; SSE2: # BB#0: # %entry
447 ; SSE2-NEXT: pextrw $4, %xmm1, %eax
448 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
449 ; SSE2-NEXT: pextrw $4, %xmm0, %ecx
450 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
451 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
452 ; SSE2-NEXT: pextrw $4, %xmm3, %edx
453 ; SSE2-NEXT: movd %edx, %xmm1
454 ; SSE2-NEXT: movd %eax, %xmm3
455 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
456 ; SSE2-NEXT: pextrw $4, %xmm2, %eax
457 ; SSE2-NEXT: movd %eax, %xmm1
458 ; SSE2-NEXT: movd %ecx, %xmm2
459 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
460 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
461 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
464 ; SSSE3-LABEL: trunc2x4i64_8i16:
465 ; SSSE3: # BB#0: # %entry
466 ; SSSE3-NEXT: pextrw $4, %xmm1, %eax
467 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
468 ; SSSE3-NEXT: pextrw $4, %xmm0, %ecx
469 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
470 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
471 ; SSSE3-NEXT: pextrw $4, %xmm3, %edx
472 ; SSSE3-NEXT: movd %edx, %xmm1
473 ; SSSE3-NEXT: movd %eax, %xmm3
474 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
475 ; SSSE3-NEXT: pextrw $4, %xmm2, %eax
476 ; SSSE3-NEXT: movd %eax, %xmm1
477 ; SSSE3-NEXT: movd %ecx, %xmm2
478 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
479 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
480 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
483 ; SSE41-LABEL: trunc2x4i64_8i16:
484 ; SSE41: # BB#0: # %entry
485 ; SSE41-NEXT: pextrw $4, %xmm0, %eax
486 ; SSE41-NEXT: pinsrw $1, %eax, %xmm0
487 ; SSE41-NEXT: movd %xmm1, %eax
488 ; SSE41-NEXT: pinsrw $2, %eax, %xmm0
489 ; SSE41-NEXT: pextrw $4, %xmm1, %eax
490 ; SSE41-NEXT: pinsrw $3, %eax, %xmm0
491 ; SSE41-NEXT: movd %xmm2, %eax
492 ; SSE41-NEXT: pinsrw $4, %eax, %xmm0
493 ; SSE41-NEXT: pextrw $4, %xmm2, %eax
494 ; SSE41-NEXT: pinsrw $5, %eax, %xmm0
495 ; SSE41-NEXT: movd %xmm3, %eax
496 ; SSE41-NEXT: pinsrw $6, %eax, %xmm0
497 ; SSE41-NEXT: pextrw $4, %xmm3, %eax
498 ; SSE41-NEXT: pinsrw $7, %eax, %xmm0
501 ; AVX1-LABEL: trunc2x4i64_8i16:
502 ; AVX1: # BB#0: # %entry
503 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
504 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
505 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
506 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
507 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
508 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
509 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
510 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
511 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
512 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
513 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
514 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
515 ; AVX1-NEXT: vzeroupper
518 ; AVX2-LABEL: trunc2x4i64_8i16:
519 ; AVX2: # BB#0: # %entry
520 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
521 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
522 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
523 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
524 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
525 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
526 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
527 ; AVX2-NEXT: vzeroupper
530 ; AVX512BW-LABEL: trunc2x4i64_8i16:
531 ; AVX512BW: # BB#0: # %entry
532 ; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
533 ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
534 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
535 ; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1
536 ; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0
537 ; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
538 ; AVX512BW-NEXT: retq
540 %0 = trunc <4 x i64> %a to <4 x i16>
541 %1 = trunc <4 x i64> %b to <4 x i16>
542 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
546 define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) {
547 ; SSE2-LABEL: trunc2x2i64_4i32:
548 ; SSE2: # BB#0: # %entry
549 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
550 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
551 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
554 ; SSSE3-LABEL: trunc2x2i64_4i32:
555 ; SSSE3: # BB#0: # %entry
556 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
557 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
558 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
561 ; SSE41-LABEL: trunc2x2i64_4i32:
562 ; SSE41: # BB#0: # %entry
563 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
564 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
565 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
568 ; AVX1-LABEL: trunc2x2i64_4i32:
569 ; AVX1: # BB#0: # %entry
570 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
571 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
572 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
575 ; AVX2-LABEL: trunc2x2i64_4i32:
576 ; AVX2: # BB#0: # %entry
577 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
578 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
579 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
582 ; AVX512BW-LABEL: trunc2x2i64_4i32:
583 ; AVX512BW: # BB#0: # %entry
584 ; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
585 ; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
586 ; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
587 ; AVX512BW-NEXT: retq
589 %0 = trunc <2 x i64> %a to <2 x i32>
590 %1 = trunc <2 x i64> %b to <2 x i32>
591 %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
595 define i64 @trunc2i64_i64(<2 x i64> %inval) {
596 ; SSE-LABEL: trunc2i64_i64:
597 ; SSE: # BB#0: # %entry
598 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
599 ; SSE-NEXT: movd %xmm0, %rax
602 ; AVX-LABEL: trunc2i64_i64:
603 ; AVX: # BB#0: # %entry
604 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
605 ; AVX-NEXT: vmovq %xmm0, %rax
608 ; AVX512BW-LABEL: trunc2i64_i64:
609 ; AVX512BW: # BB#0: # %entry
610 ; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
611 ; AVX512BW-NEXT: vmovq %xmm0, %rax
612 ; AVX512BW-NEXT: retq
614 %0 = trunc <2 x i64> %inval to <2 x i32>
615 %1 = bitcast <2 x i32> %0 to i64
619 define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) {
620 ; SSE2-LABEL: trunc2x4i32_8i16:
621 ; SSE2: # BB#0: # %entry
622 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
623 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
624 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
625 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
626 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
627 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
628 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
631 ; SSSE3-LABEL: trunc2x4i32_8i16:
632 ; SSSE3: # BB#0: # %entry
633 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
634 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
635 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
636 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
639 ; SSE41-LABEL: trunc2x4i32_8i16:
640 ; SSE41: # BB#0: # %entry
641 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
642 ; SSE41-NEXT: pshufb %xmm2, %xmm1
643 ; SSE41-NEXT: pshufb %xmm2, %xmm0
644 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
647 ; AVX-LABEL: trunc2x4i32_8i16:
648 ; AVX: # BB#0: # %entry
649 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
650 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
651 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
652 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
655 ; AVX512BW-LABEL: trunc2x4i32_8i16:
656 ; AVX512BW: # BB#0: # %entry
657 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
658 ; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1
659 ; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0
660 ; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
661 ; AVX512BW-NEXT: retq
663 %0 = trunc <4 x i32> %a to <4 x i16>
664 %1 = trunc <4 x i32> %b to <4 x i16>
665 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
669 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
670 define i64 @trunc4i32_i64(<4 x i32> %inval) {
671 ; SSE2-LABEL: trunc4i32_i64:
672 ; SSE2: # BB#0: # %entry
673 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
674 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
675 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
676 ; SSE2-NEXT: movd %xmm0, %rax
679 ; SSSE3-LABEL: trunc4i32_i64:
680 ; SSSE3: # BB#0: # %entry
681 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
682 ; SSSE3-NEXT: movd %xmm0, %rax
685 ; SSE41-LABEL: trunc4i32_i64:
686 ; SSE41: # BB#0: # %entry
687 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
688 ; SSE41-NEXT: movd %xmm0, %rax
691 ; AVX-LABEL: trunc4i32_i64:
692 ; AVX: # BB#0: # %entry
693 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
694 ; AVX-NEXT: vmovq %xmm0, %rax
697 ; AVX512BW-LABEL: trunc4i32_i64:
698 ; AVX512BW: # BB#0: # %entry
699 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
700 ; AVX512BW-NEXT: vmovq %xmm0, %rax
701 ; AVX512BW-NEXT: retq
703 %0 = trunc <4 x i32> %inval to <4 x i16>
704 %1 = bitcast <4 x i16> %0 to i64
708 define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) {
709 ; SSE2-LABEL: trunc2x8i16_16i8:
710 ; SSE2: # BB#0: # %entry
711 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
712 ; SSE2-NEXT: pand %xmm2, %xmm1
713 ; SSE2-NEXT: pand %xmm2, %xmm0
714 ; SSE2-NEXT: packuswb %xmm1, %xmm0
717 ; SSSE3-LABEL: trunc2x8i16_16i8:
718 ; SSSE3: # BB#0: # %entry
719 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
720 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
721 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
722 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
725 ; SSE41-LABEL: trunc2x8i16_16i8:
726 ; SSE41: # BB#0: # %entry
727 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
728 ; SSE41-NEXT: pshufb %xmm2, %xmm1
729 ; SSE41-NEXT: pshufb %xmm2, %xmm0
730 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
733 ; AVX-LABEL: trunc2x8i16_16i8:
734 ; AVX: # BB#0: # %entry
735 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
736 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
737 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
738 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
741 ; AVX512BW-LABEL: trunc2x8i16_16i8:
742 ; AVX512BW: # BB#0: # %entry
743 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
744 ; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1
745 ; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0
746 ; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
747 ; AVX512BW-NEXT: retq
749 %0 = trunc <8 x i16> %a to <8 x i8>
750 %1 = trunc <8 x i16> %b to <8 x i8>
751 %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
755 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
756 define i64 @trunc8i16_i64(<8 x i16> %inval) {
757 ; SSE2-LABEL: trunc8i16_i64:
758 ; SSE2: # BB#0: # %entry
759 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
760 ; SSE2-NEXT: packuswb %xmm0, %xmm0
761 ; SSE2-NEXT: movd %xmm0, %rax
764 ; SSSE3-LABEL: trunc8i16_i64:
765 ; SSSE3: # BB#0: # %entry
766 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
767 ; SSSE3-NEXT: movd %xmm0, %rax
770 ; SSE41-LABEL: trunc8i16_i64:
771 ; SSE41: # BB#0: # %entry
772 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
773 ; SSE41-NEXT: movd %xmm0, %rax
776 ; AVX-LABEL: trunc8i16_i64:
777 ; AVX: # BB#0: # %entry
778 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
779 ; AVX-NEXT: vmovq %xmm0, %rax
782 ; AVX512BW-LABEL: trunc8i16_i64:
783 ; AVX512BW: # BB#0: # %entry
784 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
785 ; AVX512BW-NEXT: vmovq %xmm0, %rax
786 ; AVX512BW-NEXT: retq
788 %0 = trunc <8 x i16> %inval to <8 x i8>
789 %1 = bitcast <8 x i8> %0 to i64
793 define <16 x i8> @trunc16i64_16i8_const() {
794 ; SSE-LABEL: trunc16i64_16i8_const:
795 ; SSE: # BB#0: # %entry
796 ; SSE-NEXT: xorps %xmm0, %xmm0
799 ; AVX-LABEL: trunc16i64_16i8_const:
800 ; AVX: # BB#0: # %entry
801 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
804 ; AVX512BW-LABEL: trunc16i64_16i8_const:
805 ; AVX512BW: # BB#0: # %entry
806 ; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0
807 ; AVX512BW-NEXT: retq
810 %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
811 %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26>