-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s --check-prefix=AVX512VLCD
-
-target triple = "x86_64-unknown-unknown"
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VLCD --check-prefix=ALL --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=AVX512CD --check-prefix=ALL --check-prefix=AVX512
define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; SSE2-LABEL: testv2i64:
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
-
+;
; AVX512VLCD-LABEL: testv2i64:
; AVX512VLCD: ## BB#0:
; AVX512VLCD-NEXT: vplzcntq %xmm0, %xmm0
; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv2i64:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
+; AVX512CD-NEXT: retq
+
%out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 0)
ret <2 x i64> %out
}
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
-
+;
; AVX512VLCD-LABEL: testv2i64u:
; AVX512VLCD: ## BB#0:
; AVX512VLCD-NEXT: vplzcntq %xmm0, %xmm0
; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv2i64u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
+; AVX512CD-NEXT: retq
+
%out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 -1)
ret <2 x i64> %out
}
; AVX-NEXT: xorl $31, %eax
; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
; AVX-NEXT: retq
-
+;
; AVX512VLCD-LABEL: testv4i32:
; AVX512VLCD: ## BB#0:
; AVX512VLCD-NEXT: vplzcntd %xmm0, %xmm0
; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv4i32:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: retq
+
%out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %in, i1 0)
ret <4 x i32> %out
}
; AVX-NEXT: xorl $31, %eax
; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
; AVX-NEXT: retq
-
+;
; AVX512VLCD-LABEL: testv4i32u:
; AVX512VLCD: ## BB#0:
; AVX512VLCD-NEXT: vplzcntd %xmm0, %xmm0
; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv4i32u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: retq
+
%out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %in, i1 -1)
ret <4 x i32> %out
}
; AVX-NEXT: xorl $15, %ecx
; AVX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv8i16:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vpmovzxwd %xmm0, %ymm0
+; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0
+; AVX512VLCD-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512VLCD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv8i16:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512CD-NEXT: retq
%out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %in, i1 0)
ret <8 x i16> %out
}
; AVX-NEXT: xorl $15, %eax
; AVX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv8i16u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vpmovzxwd %xmm0, %ymm0
+; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0
+; AVX512VLCD-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512VLCD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv8i16u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512CD-NEXT: retq
%out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %in, i1 -1)
ret <8 x i16> %out
}
; AVX-NEXT: xorl $7, %ecx
; AVX-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv16i8:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VLCD-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv16i8:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512CD-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512CD-NEXT: retq
%out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %in, i1 0)
ret <16 x i8> %out
}
; AVX-NEXT: xorl $7, %eax
; AVX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv16i8u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VLCD-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv16i8u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512CD-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512CD-NEXT: retq
%out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %in, i1 -1)
ret <16 x i8> %out
}
; AVX-NEXT: movl $55, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv2i64:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: movl $55, %eax
+; AVX512VLCD-NEXT: vmovq %rax, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv2i64:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: movl $55, %eax
+; AVX512CD-NEXT: vmovq %rax, %xmm0
+; AVX512CD-NEXT: retq
%out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> <i64 256, i64 -1>, i1 0)
ret <2 x i64> %out
}
; AVX-NEXT: movl $55, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv2i64u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: movl $55, %eax
+; AVX512VLCD-NEXT: vmovq %rax, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv2i64u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: movl $55, %eax
+; AVX512CD-NEXT: vmovq %rax, %xmm0
+; AVX512CD-NEXT: retq
%out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> <i64 256, i64 -1>, i1 -1)
ret <2 x i64> %out
}
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv4i32:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv4i32:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512CD-NEXT: retq
%out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>, i1 0)
ret <4 x i32> %out
}
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv4i32u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv4i32u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512CD-NEXT: retq
%out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>, i1 -1)
ret <4 x i32> %out
}
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv8i16:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv8i16:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512CD-NEXT: retq
%out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>, i1 0)
ret <8 x i16> %out
}
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv8i16u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv8i16u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512CD-NEXT: retq
%out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>, i1 -1)
ret <8 x i16> %out
}
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv16i8:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv16i8:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512CD-NEXT: retq
%out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>, i1 0)
ret <16 x i8> %out
}
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv16i8u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv16i8u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512CD-NEXT: retq
%out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>, i1 -1)
ret <16 x i8> %out
}