From 52ebd433386d85eee0b06d4592961294563fb067 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 24 Dec 2015 08:12:22 +0000 Subject: [PATCH] AVX-512: Kreg set 0/1 optimization The patterns that set a mask register to 0/1 KXOR %kn, %kn, %kn / KXNOR %kn, %kn, %kn are replaced with KXOR %k0, %k0, %kn / KXNOR %k0, %k0, %kn - AVX-512 targets optimization. KNL does not recognize dependency-breaking idioms for mask registers, so kxnor %k1, %k1, %k2 has a RAW dependence on %k1. Using %k0 as the undef input register is a performance heuristic based on the assumption that %k0 is used less frequently than the other mask registers, since it is not usable as a write mask. Differential Revision: http://reviews.llvm.org/D15739 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256365 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 34 +++++++++-- .../X86/avx512-gather-scatter-intrin.ll | 60 +++++++++---------- test/CodeGen/X86/avx512-mask-op.ll | 2 +- test/CodeGen/X86/masked_gather_scatter.ll | 58 +++++++++--------- 4 files changed, 88 insertions(+), 66 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index fb937fdcb08..664ba7f0d3e 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -5279,6 +5279,20 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB, return true; } +/// Expand a single-def pseudo instruction to a two-addr +/// instruction with two %k0 reads. +/// This is used for mapping: +/// %k4 = K_SET1 +/// to: +/// %k4 = KXNORrr %k0, %k0 +static bool Expand2AddrKreg(MachineInstrBuilder &MIB, + const MCInstrDesc &Desc, unsigned Reg) { + assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); + MIB->setDesc(Desc); + MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); + return true; +} + static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, bool MinusOne) { MachineBasicBlock &MBB = *MIB->getParent(); @@ -5400,14 +5414,22 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; + + // KNL does not recognize dependency-breaking idioms for mask registers, + // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1. + // Using %k0 as the undef input register is a performance heuristic based + // on the assumption that %k0 is used less frequently than the other mask + // registers, since it is not usable as a write mask. + // FIXME: A more advanced approach would be to choose the best input mask + // register based on context. case X86::KSET0B: - case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr)); - case X86::KSET0D: return Expand2AddrUndef(MIB, get(X86::KXORDrr)); - case X86::KSET0Q: return Expand2AddrUndef(MIB, get(X86::KXORQrr)); + case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0); + case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0); + case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0); case X86::KSET1B: - case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr)); - case X86::KSET1D: return Expand2AddrUndef(MIB, get(X86::KXNORDrr)); - case X86::KSET1Q: return Expand2AddrUndef(MIB, get(X86::KXNORQrr)); + case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0); + case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0); + case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0); case TargetOpcode::LOAD_STACK_GUARD: expandLoadStackGuard(MIB, *this); return true; diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll index c9a93ecf762..3bc67cceaab 100644 --- a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -240,8 +240,8 @@ define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 % define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) { ; CHECK-LABEL: gather_qps: ; CHECK: ## BB#0: -; CHECK-NEXT: kxnorw %k1, %k1, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2} ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 ; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1} @@ -257,7 +257,7 @@ declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32); define void @prefetch(<8 x i64> %ind, i8* %base) { ; CHECK-LABEL: prefetch: ; CHECK: ## BB#0: -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1} ; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1} ; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1} @@ -279,7 +279,7 @@ define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1 ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,2), %xmm0 {%k1} ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -312,7 +312,7 @@ define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1 ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,2), %ymm0 {%k1} ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -330,7 +330,7 @@ define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1} ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -348,7 +348,7 @@ define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,2), %xmm0 {%k1} ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -364,7 +364,7 @@ define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x ; CHECK-LABEL: test_int_x86_avx512_gather3div4_si: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2} ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1} @@ -384,7 +384,7 @@ define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,2), %xmm0 {%k1} ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -420,7 +420,7 @@ define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1 ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %xmm0 {%k1} ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -453,7 +453,7 @@ define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1 ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %ymm0 {%k1} ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -486,7 +486,7 @@ define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,2), %xmm0 {%k1} ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -502,7 +502,7 @@ define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2} ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1} @@ -522,7 +522,7 @@ define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm0 {%k1} ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -556,7 +556,7 @@ define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, < ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq @@ -572,7 +572,7 @@ define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2) @@ -587,7 +587,7 @@ define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2) @@ -602,7 +602,7 @@ define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2) @@ -617,7 +617,7 @@ define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2) @@ -631,7 +631,7 @@ define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, < ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq @@ -647,7 +647,7 @@ define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2) @@ -662,7 +662,7 @@ define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2) @@ -676,7 +676,7 @@ define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, < ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq @@ -691,7 +691,7 @@ define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, < ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq @@ -707,7 +707,7 @@ define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2) @@ -721,7 +721,7 @@ define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, < ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: kxnorw %k2, %k2, %k2 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2} ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq @@ -737,7 +737,7 @@ define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2) @@ -752,7 +752,7 @@ define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2) @@ -767,7 +767,7 @@ define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2) @@ -782,7 +782,7 @@ define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, < ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} -; CHECK-NEXT: kxnorw %k1, %k1, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2) diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index d4ec23699a0..244d761058c 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -323,7 +323,7 @@ define <16 x i1> @test15(i32 %x, i32 %y) { } ; SKX-LABEL: test16 -; SKX: kxnorw %k1, %k1, %k1 +; SKX: kxnorw %k0, %k0, %k1 ; SKX: kshiftrw $15, %k1, %k1 ; SKX: kshiftlq $5, %k1, %k1 ; SKX: korq %k1, %k0, %k0 diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index 84f04c01efc..8578c76d5f1 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -20,7 +20,7 @@ target triple = "x86_64-unknown-linux-gnu" define <16 x float> @test1(float* %base, <16 x i32> %ind) { ; KNL_64-LABEL: test1: ; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} ; KNL_64-NEXT: vmovaps %zmm1, %zmm0 ; KNL_64-NEXT: retq @@ -28,14 +28,14 @@ define <16 x float> @test1(float* %base, <16 x i32> %ind) { ; KNL_32-LABEL: test1: ; KNL_32: # BB#0: ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} ; KNL_32-NEXT: vmovaps %zmm1, %zmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test1: ; SKX: # BB#0: -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq @@ -243,8 +243,8 @@ declare void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> , i32 , <16 x define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) { ; KNL_64-LABEL: test6: ; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 -; KNL_64-NEXT: kxnorw %k2, %k2, %k2 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k2 ; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} ; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_64-NEXT: vmovaps %zmm2, %zmm0 @@ -252,9 +252,9 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) { ; ; KNL_32-LABEL: test6: ; KNL_32: # BB#0: -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm2 -; KNL_32-NEXT: kxnorw %k2, %k2, %k2 +; KNL_32-NEXT: kxnorw %k0, %k0, %k2 ; KNL_32-NEXT: vpgatherqd (,%zmm2), %ymm1 {%k2} ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm2) {%k1} ; KNL_32-NEXT: vmovaps %zmm1, %zmm0 @@ -262,8 +262,8 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) { ; ; SKX-LABEL: test6: ; SKX: # BB#0: -; SKX-NEXT: kxnorw %k1, %k1, %k1 -; SKX-NEXT: kxnorw %k2, %k2, %k2 +; SKX-NEXT: kxnorw %k0, %k0, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k2 ; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} ; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; SKX-NEXT: vmovaps %zmm2, %zmm0 @@ -409,7 +409,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; KNL_64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; KNL_64-NEXT: retq ; @@ -426,7 +426,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; KNL_32-NEXT: vpbroadcastd .LCPI8_2, %ymm1 ; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1 -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; KNL_32-NEXT: retl ; @@ -439,7 +439,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 ; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -471,7 +471,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { ; KNL_64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; KNL_64-NEXT: retq ; @@ -488,7 +488,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { ; KNL_32-NEXT: vpbroadcastd .LCPI9_2, %ymm1 ; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1 -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; KNL_32-NEXT: retl ; @@ -501,7 +501,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { ; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 ; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} ; SKX-NEXT: retq entry: @@ -518,7 +518,7 @@ define <16 x float> @test11(float* %base, i32 %ind) { ; KNL_64-LABEL: test11: ; KNL_64: # BB#0: ; KNL_64-NEXT: vpbroadcastd %esi, %zmm1 -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} ; KNL_64-NEXT: retq ; @@ -526,14 +526,14 @@ define <16 x float> @test11(float* %base, i32 %ind) { ; KNL_32: # BB#0: ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1 -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} ; KNL_32-NEXT: retl ; ; SKX-LABEL: test11: ; SKX: # BB#0: ; SKX-NEXT: vpbroadcastd %esi, %zmm1 -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} ; SKX-NEXT: retq @@ -550,7 +550,7 @@ define <16 x float> @test11(float* %base, i32 %ind) { define <16 x float> @test12(float* %base, <16 x i32> %ind) { ; KNL_64-LABEL: test12: ; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} ; KNL_64-NEXT: vmovaps %zmm1, %zmm0 ; KNL_64-NEXT: retq @@ -558,14 +558,14 @@ define <16 x float> @test12(float* %base, <16 x i32> %ind) { ; KNL_32-LABEL: test12: ; KNL_32: # BB#0: ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} ; KNL_32-NEXT: vmovaps %zmm1, %zmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test12: ; SKX: # BB#0: -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq @@ -1064,7 +1064,7 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) { ; ; SKX-LABEL: test24: ; SKX: # BB#0: -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1} ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq @@ -1133,7 +1133,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) { ; ; SKX-LABEL: test26: ; SKX: # BB#0: -; SKX-NEXT: kxnorw %k1, %k1, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1} ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq @@ -1404,8 +1404,8 @@ declare <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**>, i32, <16 x i1 define <16 x float*> @test31(<16 x float**> %ptrs) { ; KNL_64-LABEL: test31: ; KNL_64: # BB#0: -; KNL_64-NEXT: kxnorw %k1, %k1, %k1 -; KNL_64-NEXT: kxnorw %k2, %k2, %k2 +; KNL_64-NEXT: kxnorw %k0, %k0, %k1 +; KNL_64-NEXT: kxnorw %k0, %k0, %k2 ; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2} ; KNL_64-NEXT: kshiftrw $8, %k1, %k1 ; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1} @@ -1415,15 +1415,15 @@ define <16 x float*> @test31(<16 x float**> %ptrs) { ; ; KNL_32-LABEL: test31: ; KNL_32: # BB#0: -; KNL_32-NEXT: kxnorw %k1, %k1, %k1 +; KNL_32-NEXT: kxnorw %k0, %k0, %k1 ; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1} ; KNL_32-NEXT: vmovaps %zmm1, %zmm0 ; KNL_32-NEXT: retl ; ; SKX-LABEL: test31: ; SKX: # BB#0: -; SKX-NEXT: kxnorw %k1, %k1, %k1 -; SKX-NEXT: kxnorw %k2, %k2, %k2 +; SKX-NEXT: kxnorw %k0, %k0, %k1 +; SKX-NEXT: kxnorw %k0, %k0, %k2 ; SKX-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2} ; SKX-NEXT: kshiftrw $8, %k1, %k1 ; SKX-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1} @@ -1433,7 +1433,7 @@ define <16 x float*> @test31(<16 x float**> %ptrs) { ; ; SKX_32-LABEL: test31: ; SKX_32: # BB#0: -; SKX_32-NEXT: kxnorw %k1, %k1, %k1 +; SKX_32-NEXT: kxnorw %k0, %k0, %k1 ; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1} ; SKX_32-NEXT: vmovaps %zmm1, %zmm0 ; SKX_32-NEXT: retl -- 2.34.1