From 796a06d4eb49fd8b99973790951e4cf9d15513ad Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 8 Jul 2015 08:07:57 +0000 Subject: [PATCH] [X86][SSE] Added (V)ROUNDSD + (V)ROUNDSS stack folding support git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241671 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 12 ++++++++---- test/CodeGen/X86/stack-folding-fp-avx1.ll | 18 ++++++++++++++++-- test/CodeGen/X86/stack-folding-fp-sse42.ll | 22 ++++++++++++++++++++-- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 5944ae097dd..dd2d04dbe49 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1107,6 +1107,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 }, { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 }, { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 }, + { X86::ROUNDSDr, X86::ROUNDSDm, 0 }, + { X86::ROUNDSSr, X86::ROUNDSSm, 0 }, { X86::SBB32rr, X86::SBB32rm, 0 }, { X86::SBB64rr, X86::SBB64rm, 0 }, { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 }, @@ -1403,6 +1405,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 }, { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 }, { X86::VPXORrr, X86::VPXORrm, 0 }, + { X86::VROUNDSDr, X86::VROUNDSDm, 0 }, + { X86::VROUNDSSr, X86::VROUNDSSm, 0 }, { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 }, { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 }, { X86::VSUBPDrr, X86::VSUBPDrm, 0 }, @@ -6395,7 +6399,7 @@ static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) { hasVirtualRegDefsInBasicBlock(*MI1, MBB) && MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) return true; - + return false; } @@ -6500,7 +6504,7 @@ static void reassociateOps(MachineInstr &Root, MachineInstr &Prev, MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]); MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]); MachineOperand &OpC = Root.getOperand(0); - + unsigned RegA = OpA.getReg(); unsigned RegB = OpB.getReg(); unsigned RegX = OpX.getReg(); @@ -6535,7 +6539,7 @@ static void reassociateOps(MachineInstr &Root, MachineInstr &Prev, .addReg(RegX, getKillRegState(KillX)) .addReg(RegY, getKillRegState(KillY)); InsInstrs.push_back(MIB1); - + MachineInstrBuilder MIB2 = BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) .addReg(RegA, getKillRegState(KillA)) @@ -6567,7 +6571,7 @@ void X86InstrInfo::genAlternativeCodeSequence( Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); } assert(Prev && "Unknown pattern for machine combiner"); - + reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); return; } diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll index c7c1fc94638..63aa742bdf0 100644 --- a/test/CodeGen/X86/stack-folding-fp-avx1.ll +++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll @@ -1409,12 +1409,26 @@ define <8 x float> @stack_fold_roundps_ymm(<8 x float> %a0) { } declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone -; TODO stack_fold_roundsd +define double @stack_fold_roundsd(double %a0) optsize { + ;CHECK-LABEL: stack_fold_roundsd + ;CHECK: vroundsd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call double @llvm.floor.f64(double %a0) + ret double %2 +} +declare double @llvm.floor.f64(double) nounwind readnone ; TODO stack_fold_roundsd_int declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone -; TODO stack_fold_roundss +define float @stack_fold_roundss(float %a0) optsize { + ;CHECK-LABEL: stack_fold_roundss + ;CHECK: vroundss $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call float @llvm.floor.f32(float %a0) + ret float %2 +} +declare float @llvm.floor.f32(float) nounwind readnone ; TODO stack_fold_roundss_int declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone diff --git a/test/CodeGen/X86/stack-folding-fp-sse42.ll b/test/CodeGen/X86/stack-folding-fp-sse42.ll index 1fd8707092d..f9fcbaabdeb 100644 --- a/test/CodeGen/X86/stack-folding-fp-sse42.ll +++ b/test/CodeGen/X86/stack-folding-fp-sse42.ll @@ -884,11 +884,29 @@ define <4 x float> @stack_fold_roundps(<4 x float> %a0) { } declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone -; TODO stack_fold_roundsd +define double @stack_fold_roundsd(double %a0) optsize { + ;CHECK-LABEL: stack_fold_roundsd + ;CHECK: roundsd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call double @llvm.floor.f64(double %a0) + ret double %2 +} +declare double @llvm.floor.f64(double) nounwind readnone + ; TODO stack_fold_roundsd_int +declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone + +define float @stack_fold_roundss(float %a0) optsize { + ;CHECK-LABEL: stack_fold_roundss + ;CHECK: roundss $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call float @llvm.floor.f32(float %a0) + ret float %2 +} +declare float @llvm.floor.f32(float) nounwind readnone -; TODO stack_fold_roundss ; TODO stack_fold_roundss_int +declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone ; TODO stack_fold_rsqrtps -- 2.34.1