1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 target triple = "x86_64-unknown-unknown"
12 define <2 x double> @insert_v2f64_z1(<2 x double> %a) {
13 ; SSE-LABEL: insert_v2f64_z1:
15 ; SSE-NEXT: xorpd %xmm1, %xmm1
16 ; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
19 ; AVX-LABEL: insert_v2f64_z1:
21 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
22 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
24 %1 = insertelement <2 x double> %a, double 0.0, i32 0
28 define <4 x double> @insert_v4f64_0zz3(<4 x double> %a) {
29 ; SSE-LABEL: insert_v4f64_0zz3:
31 ; SSE-NEXT: xorpd %xmm2, %xmm2
32 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
33 ; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
36 ; AVX-LABEL: insert_v4f64_0zz3:
38 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
39 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm0[0],xmm1[0]
40 ; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3]
41 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
42 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
43 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
45 %1 = insertelement <4 x double> %a, double 0.0, i32 1
46 %2 = insertelement <4 x double> %1, double 0.0, i32 2
50 define <2 x i64> @insert_v2i64_z1(<2 x i64> %a) {
51 ; SSE2-LABEL: insert_v2i64_z1:
53 ; SSE2-NEXT: xorpd %xmm1, %xmm1
54 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
57 ; SSE3-LABEL: insert_v2i64_z1:
59 ; SSE3-NEXT: xorpd %xmm1, %xmm1
60 ; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
63 ; SSSE3-LABEL: insert_v2i64_z1:
65 ; SSSE3-NEXT: xorpd %xmm1, %xmm1
66 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
69 ; SSE41-LABEL: insert_v2i64_z1:
71 ; SSE41-NEXT: xorl %eax, %eax
72 ; SSE41-NEXT: pinsrq $0, %rax, %xmm0
75 ; AVX-LABEL: insert_v2i64_z1:
77 ; AVX-NEXT: xorl %eax, %eax
78 ; AVX-NEXT: vpinsrq $0, %rax, %xmm0, %xmm0
80 %1 = insertelement <2 x i64> %a, i64 0, i32 0
84 define <4 x i64> @insert_v4i64_01z3(<4 x i64> %a) {
85 ; SSE2-LABEL: insert_v4i64_01z3:
87 ; SSE2-NEXT: xorpd %xmm2, %xmm2
88 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
91 ; SSE3-LABEL: insert_v4i64_01z3:
93 ; SSE3-NEXT: xorpd %xmm2, %xmm2
94 ; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
97 ; SSSE3-LABEL: insert_v4i64_01z3:
99 ; SSSE3-NEXT: xorpd %xmm2, %xmm2
100 ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
103 ; SSE41-LABEL: insert_v4i64_01z3:
105 ; SSE41-NEXT: xorl %eax, %eax
106 ; SSE41-NEXT: pinsrq $0, %rax, %xmm1
109 ; AVX1-LABEL: insert_v4i64_01z3:
111 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
112 ; AVX1-NEXT: xorl %eax, %eax
113 ; AVX1-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
114 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
117 ; AVX2-LABEL: insert_v4i64_01z3:
119 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
120 ; AVX2-NEXT: xorl %eax, %eax
121 ; AVX2-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
122 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
124 %1 = insertelement <4 x i64> %a, i64 0, i32 2
128 define <4 x float> @insert_v4f32_01z3(<4 x float> %a) {
129 ; SSE2-LABEL: insert_v4f32_01z3:
131 ; SSE2-NEXT: xorps %xmm1, %xmm1
132 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
133 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
136 ; SSE3-LABEL: insert_v4f32_01z3:
138 ; SSE3-NEXT: xorps %xmm1, %xmm1
139 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
140 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
143 ; SSSE3-LABEL: insert_v4f32_01z3:
145 ; SSSE3-NEXT: xorps %xmm1, %xmm1
146 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
147 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
150 ; SSE41-LABEL: insert_v4f32_01z3:
152 ; SSE41-NEXT: xorps %xmm1, %xmm1
153 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
156 ; AVX-LABEL: insert_v4f32_01z3:
158 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
159 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
161 %1 = insertelement <4 x float> %a, float 0.0, i32 2
165 define <8 x float> @insert_v8f32_z12345z7(<8 x float> %a) {
166 ; SSE2-LABEL: insert_v8f32_z12345z7:
168 ; SSE2-NEXT: xorps %xmm2, %xmm2
169 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
170 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
171 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
174 ; SSE3-LABEL: insert_v8f32_z12345z7:
176 ; SSE3-NEXT: xorps %xmm2, %xmm2
177 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
178 ; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
179 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
182 ; SSSE3-LABEL: insert_v8f32_z12345z7:
184 ; SSSE3-NEXT: xorps %xmm2, %xmm2
185 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
186 ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
187 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
190 ; SSE41-LABEL: insert_v8f32_z12345z7:
192 ; SSE41-NEXT: xorps %xmm2, %xmm2
193 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
194 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
197 ; AVX-LABEL: insert_v8f32_z12345z7:
199 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
200 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
201 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
202 ; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
203 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
205 %1 = insertelement <8 x float> %a, float 0.0, i32 0
206 %2 = insertelement <8 x float> %1, float 0.0, i32 6
210 define <4 x i32> @insert_v4i32_01z3(<4 x i32> %a) {
211 ; SSE2-LABEL: insert_v4i32_01z3:
213 ; SSE2-NEXT: xorl %eax, %eax
214 ; SSE2-NEXT: movd %eax, %xmm1
215 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
216 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
219 ; SSE3-LABEL: insert_v4i32_01z3:
221 ; SSE3-NEXT: xorl %eax, %eax
222 ; SSE3-NEXT: movd %eax, %xmm1
223 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
224 ; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
227 ; SSSE3-LABEL: insert_v4i32_01z3:
229 ; SSSE3-NEXT: xorl %eax, %eax
230 ; SSSE3-NEXT: movd %eax, %xmm1
231 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
232 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
235 ; SSE41-LABEL: insert_v4i32_01z3:
237 ; SSE41-NEXT: xorl %eax, %eax
238 ; SSE41-NEXT: pinsrd $2, %eax, %xmm0
241 ; AVX-LABEL: insert_v4i32_01z3:
243 ; AVX-NEXT: xorl %eax, %eax
244 ; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
246 %1 = insertelement <4 x i32> %a, i32 0, i32 2
250 define <8 x i32> @insert_v8i32_z12345z7(<8 x i32> %a) {
251 ; SSE2-LABEL: insert_v8i32_z12345z7:
253 ; SSE2-NEXT: xorps %xmm2, %xmm2
254 ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
255 ; SSE2-NEXT: xorl %eax, %eax
256 ; SSE2-NEXT: movd %eax, %xmm2
257 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
258 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
261 ; SSE3-LABEL: insert_v8i32_z12345z7:
263 ; SSE3-NEXT: xorps %xmm2, %xmm2
264 ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
265 ; SSE3-NEXT: xorl %eax, %eax
266 ; SSE3-NEXT: movd %eax, %xmm2
267 ; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
268 ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
271 ; SSSE3-LABEL: insert_v8i32_z12345z7:
273 ; SSSE3-NEXT: xorps %xmm2, %xmm2
274 ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
275 ; SSSE3-NEXT: xorl %eax, %eax
276 ; SSSE3-NEXT: movd %eax, %xmm2
277 ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
278 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
281 ; SSE41-LABEL: insert_v8i32_z12345z7:
283 ; SSE41-NEXT: xorl %eax, %eax
284 ; SSE41-NEXT: pinsrd $0, %eax, %xmm0
285 ; SSE41-NEXT: pinsrd $2, %eax, %xmm1
288 ; AVX1-LABEL: insert_v8i32_z12345z7:
290 ; AVX1-NEXT: xorl %eax, %eax
291 ; AVX1-NEXT: vpinsrd $0, %eax, %xmm0, %xmm1
292 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
293 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
294 ; AVX1-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
295 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
298 ; AVX2-LABEL: insert_v8i32_z12345z7:
300 ; AVX2-NEXT: xorl %eax, %eax
301 ; AVX2-NEXT: vmovd %eax, %xmm1
302 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
303 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
304 ; AVX2-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
305 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
307 %1 = insertelement <8 x i32> %a, i32 0, i32 0
308 %2 = insertelement <8 x i32> %1, i32 0, i32 6
312 define <8 x i16> @insert_v8i16_z12345z7(<8 x i16> %a) {
313 ; SSE-LABEL: insert_v8i16_z12345z7:
315 ; SSE-NEXT: xorl %eax, %eax
316 ; SSE-NEXT: pinsrw $0, %eax, %xmm0
317 ; SSE-NEXT: pinsrw $6, %eax, %xmm0
320 ; AVX-LABEL: insert_v8i16_z12345z7:
322 ; AVX-NEXT: xorl %eax, %eax
323 ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
324 ; AVX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
326 %1 = insertelement <8 x i16> %a, i16 0, i32 0
327 %2 = insertelement <8 x i16> %1, i16 0, i32 6
331 define <16 x i16> @insert_v16i16_z12345z789ABZDEz(<16 x i16> %a) {
332 ; SSE-LABEL: insert_v16i16_z12345z789ABZDEz:
334 ; SSE-NEXT: xorl %eax, %eax
335 ; SSE-NEXT: pinsrw $0, %eax, %xmm0
336 ; SSE-NEXT: pinsrw $6, %eax, %xmm0
337 ; SSE-NEXT: pinsrw $7, %eax, %xmm1
340 ; AVX1-LABEL: insert_v16i16_z12345z789ABZDEz:
342 ; AVX1-NEXT: xorl %eax, %eax
343 ; AVX1-NEXT: vpinsrw $0, %eax, %xmm0, %xmm1
344 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
345 ; AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm1
346 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
347 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
348 ; AVX1-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
349 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
352 ; AVX2-LABEL: insert_v16i16_z12345z789ABZDEz:
354 ; AVX2-NEXT: xorl %eax, %eax
355 ; AVX2-NEXT: vpinsrw $0, %eax, %xmm0, %xmm1
356 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
357 ; AVX2-NEXT: vpinsrw $6, %eax, %xmm0, %xmm1
358 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
359 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
360 ; AVX2-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
361 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
363 %1 = insertelement <16 x i16> %a, i16 0, i32 0
364 %2 = insertelement <16 x i16> %1, i16 0, i32 6
365 %3 = insertelement <16 x i16> %2, i16 0, i32 15
369 define <16 x i8> @insert_v16i8_z123456789ABZDEz(<16 x i8> %a) {
370 ; SSE2-LABEL: insert_v16i8_z123456789ABZDEz:
372 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
373 ; SSE2-NEXT: pand %xmm1, %xmm0
374 ; SSE2-NEXT: xorl %eax, %eax
375 ; SSE2-NEXT: movd %eax, %xmm2
376 ; SSE2-NEXT: pandn %xmm2, %xmm1
377 ; SSE2-NEXT: por %xmm1, %xmm0
378 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
379 ; SSE2-NEXT: pand %xmm1, %xmm0
380 ; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
381 ; SSE2-NEXT: pandn %xmm2, %xmm1
382 ; SSE2-NEXT: por %xmm1, %xmm0
385 ; SSE3-LABEL: insert_v16i8_z123456789ABZDEz:
387 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
388 ; SSE3-NEXT: pand %xmm1, %xmm0
389 ; SSE3-NEXT: xorl %eax, %eax
390 ; SSE3-NEXT: movd %eax, %xmm2
391 ; SSE3-NEXT: pandn %xmm2, %xmm1
392 ; SSE3-NEXT: por %xmm1, %xmm0
393 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
394 ; SSE3-NEXT: pand %xmm1, %xmm0
395 ; SSE3-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
396 ; SSE3-NEXT: pandn %xmm2, %xmm1
397 ; SSE3-NEXT: por %xmm1, %xmm0
400 ; SSSE3-LABEL: insert_v16i8_z123456789ABZDEz:
402 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
403 ; SSSE3-NEXT: xorl %eax, %eax
404 ; SSSE3-NEXT: movd %eax, %xmm1
405 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
406 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
407 ; SSSE3-NEXT: por %xmm2, %xmm0
408 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero
409 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
410 ; SSSE3-NEXT: por %xmm1, %xmm0
413 ; SSE41-LABEL: insert_v16i8_z123456789ABZDEz:
415 ; SSE41-NEXT: xorl %eax, %eax
416 ; SSE41-NEXT: pinsrb $0, %eax, %xmm0
417 ; SSE41-NEXT: pinsrb $15, %eax, %xmm0
420 ; AVX-LABEL: insert_v16i8_z123456789ABZDEz:
422 ; AVX-NEXT: xorl %eax, %eax
423 ; AVX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
424 ; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
426 %1 = insertelement <16 x i8> %a, i8 0, i32 0
427 %2 = insertelement <16 x i8> %1, i8 0, i32 15
431 define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) {
432 ; SSE2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
434 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
435 ; SSE2-NEXT: pand %xmm2, %xmm0
436 ; SSE2-NEXT: xorl %eax, %eax
437 ; SSE2-NEXT: movd %eax, %xmm3
438 ; SSE2-NEXT: pandn %xmm3, %xmm2
439 ; SSE2-NEXT: por %xmm2, %xmm0
440 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
441 ; SSE2-NEXT: pand %xmm2, %xmm0
442 ; SSE2-NEXT: movdqa %xmm3, %xmm4
443 ; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
444 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
445 ; SSE2-NEXT: pand %xmm5, %xmm1
446 ; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
447 ; SSE2-NEXT: pandn %xmm3, %xmm5
448 ; SSE2-NEXT: por %xmm5, %xmm1
449 ; SSE2-NEXT: pand %xmm2, %xmm1
450 ; SSE2-NEXT: pandn %xmm4, %xmm2
451 ; SSE2-NEXT: por %xmm2, %xmm0
452 ; SSE2-NEXT: por %xmm2, %xmm1
455 ; SSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
457 ; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
458 ; SSE3-NEXT: pand %xmm2, %xmm0
459 ; SSE3-NEXT: xorl %eax, %eax
460 ; SSE3-NEXT: movd %eax, %xmm3
461 ; SSE3-NEXT: pandn %xmm3, %xmm2
462 ; SSE3-NEXT: por %xmm2, %xmm0
463 ; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
464 ; SSE3-NEXT: pand %xmm2, %xmm0
465 ; SSE3-NEXT: movdqa %xmm3, %xmm4
466 ; SSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
467 ; SSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
468 ; SSE3-NEXT: pand %xmm5, %xmm1
469 ; SSE3-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
470 ; SSE3-NEXT: pandn %xmm3, %xmm5
471 ; SSE3-NEXT: por %xmm5, %xmm1
472 ; SSE3-NEXT: pand %xmm2, %xmm1
473 ; SSE3-NEXT: pandn %xmm4, %xmm2
474 ; SSE3-NEXT: por %xmm2, %xmm0
475 ; SSE3-NEXT: por %xmm2, %xmm1
478 ; SSSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
480 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
481 ; SSSE3-NEXT: xorl %eax, %eax
482 ; SSSE3-NEXT: movd %eax, %xmm2
483 ; SSSE3-NEXT: movdqa %xmm2, %xmm3
484 ; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
485 ; SSSE3-NEXT: por %xmm3, %xmm0
486 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,128]
487 ; SSSE3-NEXT: pshufb %xmm3, %xmm0
488 ; SSSE3-NEXT: movdqa %xmm2, %xmm4
489 ; SSSE3-NEXT: pshufb {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
490 ; SSSE3-NEXT: por %xmm4, %xmm0
491 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero,xmm1[15]
492 ; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0],zero
493 ; SSSE3-NEXT: por %xmm2, %xmm1
494 ; SSSE3-NEXT: pshufb %xmm3, %xmm1
495 ; SSSE3-NEXT: por %xmm4, %xmm1
498 ; SSE41-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
500 ; SSE41-NEXT: xorl %eax, %eax
501 ; SSE41-NEXT: pinsrb $0, %eax, %xmm0
502 ; SSE41-NEXT: pinsrb $15, %eax, %xmm0
503 ; SSE41-NEXT: pinsrb $14, %eax, %xmm1
504 ; SSE41-NEXT: pinsrb $15, %eax, %xmm1
507 ; AVX1-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
509 ; AVX1-NEXT: xorl %eax, %eax
510 ; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
511 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
512 ; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
513 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
514 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
515 ; AVX1-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
516 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
517 ; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
518 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
521 ; AVX2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
523 ; AVX2-NEXT: xorl %eax, %eax
524 ; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
525 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
526 ; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
527 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
528 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
529 ; AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
530 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
531 ; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
532 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
534 %1 = insertelement <32 x i8> %a, i8 0, i32 0
535 %2 = insertelement <32 x i8> %1, i8 0, i32 15
536 %3 = insertelement <32 x i8> %2, i8 0, i32 30
537 %4 = insertelement <32 x i8> %3, i8 0, i32 31