From 45de191b0bb337142a3f2a09f5f7410844e46a81 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Thu, 2 Dec 2004 18:17:31 +0000 Subject: [PATCH] Spill/restore X86 floating point stack registers with 64-bits of precision instead of 80-bits of precision. This fixes PR467. This change speeds up fldry on X86 with LLC from 7.32s on apoc to 4.68s. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@18433 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86RegisterInfo.cpp | 9 +++++---- lib/Target/X86/X86RegisterInfo.td | 10 ++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index a90e840e283..643c5972f55 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -50,7 +50,8 @@ static unsigned getIdx(unsigned SpillSize) { case 8: return 0; case 16: return 1; case 32: return 2; - case 80: return 3; + case 64: return 3; // FP in 64-bit spill mode. + case 80: return 4; // FP in 80-bit spill mode. } } @@ -58,7 +59,7 @@ void X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, int FrameIdx) const { static const unsigned Opcode[] = - { X86::MOV8mr, X86::MOV16mr, X86::MOV32mr, X86::FSTP80m }; + { X86::MOV8mr, X86::MOV16mr, X86::MOV32mr, X86::FST64m, X86::FSTP80m }; unsigned Idx = getIdx(getSpillSize(SrcReg)); addFrameReference(BuildMI(MBB, MI, Opcode[Idx], 5), FrameIdx).addReg(SrcReg); } @@ -67,7 +68,7 @@ void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx)const{ static const unsigned Opcode[] = - { X86::MOV8rm, X86::MOV16rm, X86::MOV32rm, X86::FLD80m }; + { X86::MOV8rm, X86::MOV16rm, X86::MOV32rm, X86::FLD64m, X86::FLD80m }; unsigned Idx = getIdx(getSpillSize(DestReg)); addFrameReference(BuildMI(MBB, MI, Opcode[Idx], 4, DestReg), FrameIdx); } @@ -77,7 +78,7 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *RC) const { static const unsigned Opcode[] = - { X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV }; + { X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::FpMOV }; BuildMI(MBB, MI, Opcode[getIdx(RC->getSize()*8)], 1, DestReg).addReg(SrcReg); } diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 628a239d014..1d86c0e6437 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -84,12 +84,18 @@ def R32 : RegisterClass { }]; } -def RFP : RegisterClass; +// FIXME: This sets up the floating point register files as though they are f64 +// values, though they really are f80 values. This will cause us to spill +// values as 64-bit quantities instead of 80-bit quantities, which is much much +// faster on common hardware. In reality, this should be controlled by a +// command line option or something. + +def RFP : RegisterClass; // Floating point stack registers (these are not allocatable by the // register allocator - the floating point stackifier is responsible // for transforming FPn allocations to STn registers) -def RST : RegisterClass { +def RST : RegisterClass { let Methods = [{ iterator allocation_order_end(MachineFunction &MF) const { return begin(); -- 2.34.1