From 51fc7f5afa5bf0d194c4928bcc948aa5c0a27c0d Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Wed, 25 Feb 2015 15:14:02 +0000 Subject: [PATCH] [X86][MMX] Reapply: Add MMX instructions to foldable tables Reapply r230248. Teach the peephole optimizer to work with MMX instructions by adding entries into the foldable tables. This covers folding opportunities not handled during isel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230499 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 84 ++++++++++++++++ test/CodeGen/X86/mmx-fold-load.ll | 145 ++++++++++++++++++++++++++++ test/CodeGen/X86/vec_extract-mmx.ll | 3 +- 3 files changed, 230 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a543b4a9733..f5b9680d0bb 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -547,6 +547,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, { X86::UCOMISSrr, X86::UCOMISSrm, 0 }, + // MMX version of foldable instructions + { X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, 0 }, + { X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 }, + { X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, 0 }, + { X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, 0 }, + { X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, 0 }, + { X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 }, + { X86::MMX_PABSBrr64, X86::MMX_PABSBrm64, 0 }, + { X86::MMX_PABSDrr64, X86::MMX_PABSDrm64, 0 }, + { X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 }, + { X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 }, + // AVX 128-bit versions of foldable instructions { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 }, { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 }, @@ -1117,6 +1129,78 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 }, { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 }, + // MMX version of foldable instructions + { X86::MMX_CVTPI2PSirr, X86::MMX_CVTPI2PSirm, 0 }, + { X86::MMX_PACKSSDWirr, X86::MMX_PACKSSDWirm, 0 }, + { X86::MMX_PACKSSWBirr, X86::MMX_PACKSSWBirm, 0 }, + { X86::MMX_PACKUSWBirr, X86::MMX_PACKUSWBirm, 0 }, + { X86::MMX_PADDBirr, X86::MMX_PADDBirm, 0 }, + { X86::MMX_PADDDirr, X86::MMX_PADDDirm, 0 }, + { X86::MMX_PADDQirr, X86::MMX_PADDQirm, 0 }, + { X86::MMX_PADDSBirr, X86::MMX_PADDSBirm, 0 }, + { X86::MMX_PADDSWirr, X86::MMX_PADDSWirm, 0 }, + { X86::MMX_PADDUSBirr, X86::MMX_PADDUSBirm, 0 }, + { X86::MMX_PADDUSWirr, X86::MMX_PADDUSWirm, 0 }, + { X86::MMX_PADDWirr, X86::MMX_PADDWirm, 0 }, + { X86::MMX_PALIGNR64irr, X86::MMX_PALIGNR64irm, 0 }, + { X86::MMX_PANDNirr, X86::MMX_PANDNirm, 0 }, + { X86::MMX_PANDirr, X86::MMX_PANDirm, 0 }, + { X86::MMX_PAVGBirr, X86::MMX_PAVGBirm, 0 }, + { X86::MMX_PAVGWirr, X86::MMX_PAVGWirm, 0 }, + { X86::MMX_PCMPEQBirr, X86::MMX_PCMPEQBirm, 0 }, + { X86::MMX_PCMPEQDirr, X86::MMX_PCMPEQDirm, 0 }, + { X86::MMX_PCMPEQWirr, X86::MMX_PCMPEQWirm, 0 }, + { X86::MMX_PCMPGTBirr, X86::MMX_PCMPGTBirm, 0 }, + { X86::MMX_PCMPGTDirr, X86::MMX_PCMPGTDirm, 0 }, + { X86::MMX_PCMPGTWirr, X86::MMX_PCMPGTWirm, 0 }, + { X86::MMX_PHADDSWrr64, X86::MMX_PHADDSWrm64, 0 }, + { X86::MMX_PHADDWrr64, X86::MMX_PHADDWrm64, 0 }, + { X86::MMX_PHADDrr64, X86::MMX_PHADDrm64, 0 }, + { X86::MMX_PHSUBDrr64, X86::MMX_PHSUBDrm64, 0 }, + { X86::MMX_PHSUBSWrr64, X86::MMX_PHSUBSWrm64, 0 }, + { X86::MMX_PHSUBWrr64, X86::MMX_PHSUBWrm64, 0 }, + { X86::MMX_PINSRWirri, X86::MMX_PINSRWirmi, 0 }, + { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 }, + { X86::MMX_PMADDWDirr, X86::MMX_PMADDWDirm, 0 }, + { X86::MMX_PMAXSWirr, X86::MMX_PMAXSWirm, 0 }, + { X86::MMX_PMAXUBirr, X86::MMX_PMAXUBirm, 0 }, + { X86::MMX_PMINSWirr, X86::MMX_PMINSWirm, 0 }, + { X86::MMX_PMINUBirr, X86::MMX_PMINUBirm, 0 }, + { X86::MMX_PMULHRSWrr64, X86::MMX_PMULHRSWrm64, 0 }, + { X86::MMX_PMULHUWirr, X86::MMX_PMULHUWirm, 0 }, + { X86::MMX_PMULHWirr, X86::MMX_PMULHWirm, 0 }, + { X86::MMX_PMULLWirr, X86::MMX_PMULLWirm, 0 }, + { X86::MMX_PMULUDQirr, X86::MMX_PMULUDQirm, 0 }, + { X86::MMX_PORirr, X86::MMX_PORirm, 0 }, + { X86::MMX_PSADBWirr, X86::MMX_PSADBWirm, 0 }, + { X86::MMX_PSHUFBrr64, X86::MMX_PSHUFBrm64, 0 }, + { X86::MMX_PSIGNBrr64, X86::MMX_PSIGNBrm64, 0 }, + { X86::MMX_PSIGNDrr64, X86::MMX_PSIGNDrm64, 0 }, + { X86::MMX_PSIGNWrr64, X86::MMX_PSIGNWrm64, 0 }, + { X86::MMX_PSLLDrr, X86::MMX_PSLLDrm, 0 }, + { X86::MMX_PSLLQrr, X86::MMX_PSLLQrm, 0 }, + { X86::MMX_PSLLWrr, X86::MMX_PSLLWrm, 0 }, + { X86::MMX_PSRADrr, X86::MMX_PSRADrm, 0 }, + { X86::MMX_PSRAWrr, X86::MMX_PSRAWrm, 0 }, + { X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 }, + { X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 }, + { X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 }, + { X86::MMX_PSUBBirr, X86::MMX_PSUBBirm, 0 }, + { X86::MMX_PSUBDirr, X86::MMX_PSUBDirm, 0 }, + { X86::MMX_PSUBQirr, X86::MMX_PSUBQirm, 0 }, + { X86::MMX_PSUBSBirr, X86::MMX_PSUBSBirm, 0 }, + { X86::MMX_PSUBSWirr, X86::MMX_PSUBSWirm, 0 }, + { X86::MMX_PSUBUSBirr, X86::MMX_PSUBUSBirm, 0 }, + { X86::MMX_PSUBUSWirr, X86::MMX_PSUBUSWirm, 0 }, + { X86::MMX_PSUBWirr, X86::MMX_PSUBWirm, 0 }, + { X86::MMX_PUNPCKHBWirr, X86::MMX_PUNPCKHBWirm, 0 }, + { X86::MMX_PUNPCKHDQirr, X86::MMX_PUNPCKHDQirm, 0 }, + { X86::MMX_PUNPCKHWDirr, X86::MMX_PUNPCKHWDirm, 0 }, + { X86::MMX_PUNPCKLBWirr, X86::MMX_PUNPCKLBWirm, 0 }, + { X86::MMX_PUNPCKLDQirr, X86::MMX_PUNPCKLDQirm, 0 }, + { X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 }, + { X86::MMX_PXORirr, X86::MMX_PXORirm, 0 }, + // AVX 128-bit versions of foldable instructions { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 }, { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 }, diff --git a/test/CodeGen/X86/mmx-fold-load.ll b/test/CodeGen/X86/mmx-fold-load.ll index 79029b4cc47..d49edac8c5f 100644 --- a/test/CodeGen/X86/mmx-fold-load.ll +++ b/test/CodeGen/X86/mmx-fold-load.ll @@ -135,3 +135,148 @@ entry: ret i64 %4 } declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) + +define i64 @tt0(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt0: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: paddb (%[[REG3:[a-z]+]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) +declare void @llvm.x86.mmx.emms() + +define i64 @tt1(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt1: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: paddw (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) + +define i64 @tt2(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt2: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: paddd (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) + +define i64 @tt3(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt3: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: paddq (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) + +define i64 @tt4(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt4: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: paddusb (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) + +define i64 @tt5(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt5: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: paddusw (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) + +define i64 @tt6(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt6: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: psrlw (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) + +define i64 @tt7(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt7: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: psrld (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) + +define i64 @tt8(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt8: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: psrlq (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) diff --git a/test/CodeGen/X86/vec_extract-mmx.ll b/test/CodeGen/X86/vec_extract-mmx.ll index c7780b02376..6b467b83efa 100644 --- a/test/CodeGen/X86/vec_extract-mmx.ll +++ b/test/CodeGen/X86/vec_extract-mmx.ll @@ -49,8 +49,7 @@ entry: define i32 @test2(i32* nocapture readonly %ptr) { ; CHECK-LABEL: test2: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: movq (%rdi), %mm0 -; CHECK-NEXT: pshufw $232, %mm0, %mm0 +; CHECK-NEXT: pshufw $232, (%rdi), %mm0 ; CHECK-NEXT: movd %mm0, %eax ; CHECK-NEXT: emms ; CHECK-NEXT: retq -- 2.34.1