From: Bruno Cardoso Lopes Date: Wed, 25 Feb 2015 15:14:02 +0000 (+0000) Subject: [X86][MMX] Reapply: Add MMX instructions to foldable tables X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=51fc7f5afa5bf0d194c4928bcc948aa5c0a27c0d;p=oota-llvm.git [X86][MMX] Reapply: Add MMX instructions to foldable tables Reapply r230248. Teach the peephole optimizer to work with MMX instructions by adding entries into the foldable tables. This covers folding opportunities not handled during isel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230499 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a543b4a9733..f5b9680d0bb 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -547,6 +547,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, { X86::UCOMISSrr, X86::UCOMISSrm, 0 }, + // MMX version of foldable instructions + { X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, 0 }, + { X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 }, + { X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, 0 }, + { X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, 0 }, + { X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, 0 }, + { X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 }, + { X86::MMX_PABSBrr64, X86::MMX_PABSBrm64, 0 }, + { X86::MMX_PABSDrr64, X86::MMX_PABSDrm64, 0 }, + { X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 }, + { X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 }, + // AVX 128-bit versions of foldable instructions { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 }, { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 }, @@ -1117,6 +1129,78 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 }, { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 }, + // MMX version of foldable instructions + { X86::MMX_CVTPI2PSirr, X86::MMX_CVTPI2PSirm, 0 }, + { X86::MMX_PACKSSDWirr, X86::MMX_PACKSSDWirm, 0 }, + { X86::MMX_PACKSSWBirr, X86::MMX_PACKSSWBirm, 0 }, + { X86::MMX_PACKUSWBirr, X86::MMX_PACKUSWBirm, 0 }, + { X86::MMX_PADDBirr, X86::MMX_PADDBirm, 0 }, + { X86::MMX_PADDDirr, X86::MMX_PADDDirm, 0 }, + { X86::MMX_PADDQirr, X86::MMX_PADDQirm, 0 }, + { X86::MMX_PADDSBirr, X86::MMX_PADDSBirm, 0 }, + { X86::MMX_PADDSWirr, X86::MMX_PADDSWirm, 0 }, + { X86::MMX_PADDUSBirr, X86::MMX_PADDUSBirm, 0 }, + { X86::MMX_PADDUSWirr, X86::MMX_PADDUSWirm, 0 }, + { X86::MMX_PADDWirr, X86::MMX_PADDWirm, 0 }, + { X86::MMX_PALIGNR64irr, X86::MMX_PALIGNR64irm, 0 }, + { X86::MMX_PANDNirr, X86::MMX_PANDNirm, 0 }, + { X86::MMX_PANDirr, X86::MMX_PANDirm, 0 }, + { X86::MMX_PAVGBirr, X86::MMX_PAVGBirm, 0 }, + { X86::MMX_PAVGWirr, X86::MMX_PAVGWirm, 0 }, + { X86::MMX_PCMPEQBirr, X86::MMX_PCMPEQBirm, 0 }, + { X86::MMX_PCMPEQDirr, X86::MMX_PCMPEQDirm, 0 }, + { X86::MMX_PCMPEQWirr, X86::MMX_PCMPEQWirm, 0 }, + { X86::MMX_PCMPGTBirr, X86::MMX_PCMPGTBirm, 0 }, + { X86::MMX_PCMPGTDirr, X86::MMX_PCMPGTDirm, 0 }, + { X86::MMX_PCMPGTWirr, X86::MMX_PCMPGTWirm, 0 }, + { X86::MMX_PHADDSWrr64, X86::MMX_PHADDSWrm64, 0 }, + { X86::MMX_PHADDWrr64, X86::MMX_PHADDWrm64, 0 }, + { X86::MMX_PHADDrr64, X86::MMX_PHADDrm64, 0 }, + { X86::MMX_PHSUBDrr64, X86::MMX_PHSUBDrm64, 0 }, + { X86::MMX_PHSUBSWrr64, X86::MMX_PHSUBSWrm64, 0 }, + { X86::MMX_PHSUBWrr64, X86::MMX_PHSUBWrm64, 0 }, + { X86::MMX_PINSRWirri, X86::MMX_PINSRWirmi, 0 }, + { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 }, + { X86::MMX_PMADDWDirr, X86::MMX_PMADDWDirm, 0 }, + { X86::MMX_PMAXSWirr, X86::MMX_PMAXSWirm, 0 }, + { X86::MMX_PMAXUBirr, X86::MMX_PMAXUBirm, 0 }, + { X86::MMX_PMINSWirr, X86::MMX_PMINSWirm, 0 }, + { X86::MMX_PMINUBirr, X86::MMX_PMINUBirm, 0 }, + { X86::MMX_PMULHRSWrr64, X86::MMX_PMULHRSWrm64, 0 }, + { X86::MMX_PMULHUWirr, X86::MMX_PMULHUWirm, 0 }, + { X86::MMX_PMULHWirr, X86::MMX_PMULHWirm, 0 }, + { X86::MMX_PMULLWirr, X86::MMX_PMULLWirm, 0 }, + { X86::MMX_PMULUDQirr, X86::MMX_PMULUDQirm, 0 }, + { X86::MMX_PORirr, X86::MMX_PORirm, 0 }, + { X86::MMX_PSADBWirr, X86::MMX_PSADBWirm, 0 }, + { X86::MMX_PSHUFBrr64, X86::MMX_PSHUFBrm64, 0 }, + { X86::MMX_PSIGNBrr64, X86::MMX_PSIGNBrm64, 0 }, + { X86::MMX_PSIGNDrr64, X86::MMX_PSIGNDrm64, 0 }, + { X86::MMX_PSIGNWrr64, X86::MMX_PSIGNWrm64, 0 }, + { X86::MMX_PSLLDrr, X86::MMX_PSLLDrm, 0 }, + { X86::MMX_PSLLQrr, X86::MMX_PSLLQrm, 0 }, + { X86::MMX_PSLLWrr, X86::MMX_PSLLWrm, 0 }, + { X86::MMX_PSRADrr, X86::MMX_PSRADrm, 0 }, + { X86::MMX_PSRAWrr, X86::MMX_PSRAWrm, 0 }, + { X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 }, + { X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 }, + { X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 }, + { X86::MMX_PSUBBirr, X86::MMX_PSUBBirm, 0 }, + { X86::MMX_PSUBDirr, X86::MMX_PSUBDirm, 0 }, + { X86::MMX_PSUBQirr, X86::MMX_PSUBQirm, 0 }, + { X86::MMX_PSUBSBirr, X86::MMX_PSUBSBirm, 0 }, + { X86::MMX_PSUBSWirr, X86::MMX_PSUBSWirm, 0 }, + { X86::MMX_PSUBUSBirr, X86::MMX_PSUBUSBirm, 0 }, + { X86::MMX_PSUBUSWirr, X86::MMX_PSUBUSWirm, 0 }, + { X86::MMX_PSUBWirr, X86::MMX_PSUBWirm, 0 }, + { X86::MMX_PUNPCKHBWirr, X86::MMX_PUNPCKHBWirm, 0 }, + { X86::MMX_PUNPCKHDQirr, X86::MMX_PUNPCKHDQirm, 0 }, + { X86::MMX_PUNPCKHWDirr, X86::MMX_PUNPCKHWDirm, 0 }, + { X86::MMX_PUNPCKLBWirr, X86::MMX_PUNPCKLBWirm, 0 }, + { X86::MMX_PUNPCKLDQirr, X86::MMX_PUNPCKLDQirm, 0 }, + { X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 }, + { X86::MMX_PXORirr, X86::MMX_PXORirm, 0 }, + // AVX 128-bit versions of foldable instructions { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 }, { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 }, diff --git a/test/CodeGen/X86/mmx-fold-load.ll b/test/CodeGen/X86/mmx-fold-load.ll index 79029b4cc47..d49edac8c5f 100644 --- a/test/CodeGen/X86/mmx-fold-load.ll +++ b/test/CodeGen/X86/mmx-fold-load.ll @@ -135,3 +135,148 @@ entry: ret i64 %4 } declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) + +define i64 @tt0(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt0: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: paddb (%[[REG3:[a-z]+]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) +declare void @llvm.x86.mmx.emms() + +define i64 @tt1(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt1: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: paddw (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) + +define i64 @tt2(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt2: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: paddd (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) + +define i64 @tt3(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt3: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: paddq (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) + +define i64 @tt4(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt4: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: paddusb (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) + +define i64 @tt5(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt5: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: paddusw (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) + +define i64 @tt6(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt6: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: psrlw (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) + +define i64 @tt7(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt7: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: psrld (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) + +define i64 @tt8(x86_mmx %t, x86_mmx* %q) { +; CHECK-LABEL: tt8: +; CHECK: # BB#0:{{.*}} %entry +; CHECK: psrlq (%[[REG3]]), %mm0 +; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: emms +; CHECK-NEXT: retq +entry: + %v = load x86_mmx* %q + %u = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %t, x86_mmx %v) + %s = bitcast x86_mmx %u to i64 + call void @llvm.x86.mmx.emms() + ret i64 %s +} +declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) diff --git a/test/CodeGen/X86/vec_extract-mmx.ll b/test/CodeGen/X86/vec_extract-mmx.ll index c7780b02376..6b467b83efa 100644 --- a/test/CodeGen/X86/vec_extract-mmx.ll +++ b/test/CodeGen/X86/vec_extract-mmx.ll @@ -49,8 +49,7 @@ entry: define i32 @test2(i32* nocapture readonly %ptr) { ; CHECK-LABEL: test2: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: movq (%rdi), %mm0 -; CHECK-NEXT: pshufw $232, %mm0, %mm0 +; CHECK-NEXT: pshufw $232, (%rdi), %mm0 ; CHECK-NEXT: movd %mm0, %eax ; CHECK-NEXT: emms ; CHECK-NEXT: retq