1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8 target triple = "x86_64-unknown-unknown"
10 define <2 x i64> @foldv2i64() {
11 ; SSE-LABEL: foldv2i64:
13 ; SSE-NEXT: movl $55, %eax
14 ; SSE-NEXT: movd %rax, %xmm0
17 ; AVX-LABEL: foldv2i64:
19 ; AVX-NEXT: movl $55, %eax
20 ; AVX-NEXT: vmovq %rax, %xmm0
22 %out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> <i64 256, i64 -1>, i1 0)
26 define <2 x i64> @foldv2i64u() {
27 ; SSE-LABEL: foldv2i64u:
29 ; SSE-NEXT: movl $55, %eax
30 ; SSE-NEXT: movd %rax, %xmm0
33 ; AVX-LABEL: foldv2i64u:
35 ; AVX-NEXT: movl $55, %eax
36 ; AVX-NEXT: vmovq %rax, %xmm0
38 %out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> <i64 256, i64 -1>, i1 -1)
42 define <4 x i32> @foldv4i32() {
43 ; SSE-LABEL: foldv4i32:
45 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [23,0,32,24]
48 ; AVX-LABEL: foldv4i32:
50 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
52 %out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>, i1 0)
56 define <4 x i32> @foldv4i32u() {
57 ; SSE-LABEL: foldv4i32u:
59 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [23,0,32,24]
62 ; AVX-LABEL: foldv4i32u:
64 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
66 %out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>, i1 -1)
70 define <8 x i16> @foldv8i16() {
71 ; SSE-LABEL: foldv8i16:
73 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
76 ; AVX-LABEL: foldv8i16:
78 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
80 %out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>, i1 0)
84 define <8 x i16> @foldv8i16u() {
85 ; SSE-LABEL: foldv8i16u:
87 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
90 ; AVX-LABEL: foldv8i16u:
92 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
94 %out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>, i1 -1)
98 define <16 x i8> @foldv16i8() {
99 ; SSE-LABEL: foldv16i8:
101 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
104 ; AVX-LABEL: foldv16i8:
106 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
108 %out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>, i1 0)
112 define <16 x i8> @foldv16i8u() {
113 ; SSE-LABEL: foldv16i8u:
115 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
118 ; AVX-LABEL: foldv16i8u:
120 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
122 %out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>, i1 -1)
126 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
127 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
128 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
129 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)