//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "x86-codegen"
#include "X86.h"
#include "X86InstrInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/InlineAsm.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "x86-codegen"
+
STATISTIC(NumFXCH, "Number of fxch instructions inserted");
STATISTIC(NumFP , "Number of floating point instructions");
memset(RegMap, 0, sizeof(RegMap));
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<EdgeBundles>();
AU.addPreservedID(MachineLoopInfoID);
MachineFunctionPass::getAnalysisUsage(AU);
}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const { return "X86 FP Stackifier"; }
+ const char *getPassName() const override { return "X86 FP Stackifier"; }
private:
const TargetInstrInfo *TII; // Machine instruction info.
EdgeBundles *Bundles;
// Return a bitmask of FP registers in block's live-in list.
- unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+ static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
unsigned Mask = 0;
for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
E = MBB->livein_end(); I != E; ++I) {
- unsigned Reg = *I - X86::FP0;
- if (Reg < 8)
- Mask |= 1 << Reg;
+ unsigned Reg = *I;
+ if (Reg < X86::FP0 || Reg > X86::FP6)
+ continue;
+ Mask |= 1 << (Reg - X86::FP0);
}
return Mask;
}
// The hardware keeps track of how many FP registers are live, so we have
// to model that exactly. Usually, each live register corresponds to an
// FP<n> register, but when dealing with calls, returns, and inline
- // assembly, it is sometimes neccesary to have live scratch registers.
+ // assembly, it is sometimes necessary to have live scratch registers.
unsigned Stack[8]; // FP<n> Registers in each stack slot...
unsigned StackTop; // The current top of the FP stack.
// Shuffle live registers to match the expectations of successor blocks.
void finishBlockStack();
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dumpStack() const {
dbgs() << "Stack contents:";
for (unsigned i = 0; i != StackTop; ++i) {
dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);
dbgs() << "\n";
}
+#endif
/// getSlot - Return the stack slot number a particular register number is
/// in.
}
/// getScratchReg - Return an FP register that is not currently in use.
- unsigned getScratchReg() {
+ unsigned getScratchReg() const {
for (int i = NumFPRegs - 1; i >= 8; --i)
if (!isLive(i))
return i;
}
/// isScratchReg - Returns trus if RegNo is a scratch FP register.
- bool isScratchReg(unsigned RegNo) {
+ static bool isScratchReg(unsigned RegNo) {
return RegNo > 8 && RegNo < NumFPRegs;
}
/// getSTReg - Return the X86::ST(i) register which contains the specified
/// FP<RegNo> register.
unsigned getSTReg(unsigned RegNo) const {
- return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
+ return StackTop - 1 - getSlot(RegNo) + X86::ST0;
}
// pushReg - Push the specified FP<n> register onto the stack.
BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
}
+ /// duplicatePendingSTBeforeKill - The instruction at I is about to kill
+ /// RegNo. If any PendingST registers still need the RegNo value, duplicate
+ /// them to new scratch registers.
+ void duplicatePendingSTBeforeKill(unsigned RegNo, MachineInstr *I) {
+ for (unsigned i = 0; i != NumPendingSTs; ++i) {
+ if (PendingST[i] != RegNo)
+ continue;
+ unsigned SR = getScratchReg();
+ DEBUG(dbgs() << "Duplicating pending ST" << i
+ << " in FP" << RegNo << " to FP" << SR << '\n');
+ duplicateToTop(RegNo, SR, I);
+ PendingST[i] = SR;
+ }
+ }
+
/// popStackAfter - Pop the current value off of the top of the FP stack
/// after the specified instruction.
void popStackAfter(MachineBasicBlock::iterator &I);
void handleSpecialFP(MachineBasicBlock::iterator &I);
// Check if a COPY instruction is using FP registers.
- bool isFPCopy(MachineInstr *MI) {
+ static bool isFPCopy(MachineInstr *MI) {
unsigned DstReg = MI->getOperand(0).getReg();
unsigned SrcReg = MI->getOperand(1).getReg();
if (MI->isCopy() && isFPCopy(MI))
FPInstClass = X86II::SpecialFP;
+ if (MI->isImplicitDef() &&
+ X86::RFP80RegClass.contains(MI->getOperand(0).getReg()))
+ FPInstClass = X86II::SpecialFP;
+
if (FPInstClass == X86II::NotFP)
continue; // Efficiently ignore non-fp insts!
- MachineInstr *PrevMI = 0;
+ MachineInstr *PrevMI = nullptr;
if (I != BB.begin())
- PrevMI = prior(I);
+ PrevMI = std::prev(I);
++NumFP; // Keep track of # of pseudo instrs
DEBUG(dbgs() << "\nFPInst:\t" << *MI);
} else {
MachineBasicBlock::iterator Start = I;
// Rewind to first instruction newly inserted.
- while (Start != BB.begin() && prior(Start) != PrevI) --Start;
+ while (Start != BB.begin() && std::prev(Start) != PrevI) --Start;
dbgs() << "Inserted instructions:\n\t";
Start->print(dbgs(), &MF.getTarget());
- while (++Start != llvm::next(I)) {}
+ while (++Start != std::next(I)) {}
}
dumpStack();
);
+ (void)PrevMI;
Changed = true;
}
namespace {
struct TableEntry {
- unsigned from;
- unsigned to;
+ uint16_t from;
+ uint16_t to;
bool operator<(const TableEntry &TE) const { return from < TE.from; }
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
}
- friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V,
- const TableEntry &TE) {
+ friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
+ const TableEntry &TE) {
return V < TE.from;
}
};
// Produce implicit-defs for free by using killed registers.
while (Kills && Defs) {
- unsigned KReg = CountTrailingZeros_32(Kills);
- unsigned DReg = CountTrailingZeros_32(Defs);
+ unsigned KReg = countTrailingZeros(Kills);
+ unsigned DReg = countTrailingZeros(Defs);
DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
std::swap(RegMap[KReg], RegMap[DReg]);
// Kill registers by popping.
if (Kills && I != MBB->begin()) {
- MachineBasicBlock::iterator I2 = llvm::prior(I);
- for (;;) {
+ MachineBasicBlock::iterator I2 = std::prev(I);
+ while (StackTop) {
unsigned KReg = getStackEntry(0);
if (!(Kills & (1 << KReg)))
break;
// Manually kill the rest.
while (Kills) {
- unsigned KReg = CountTrailingZeros_32(Kills);
+ unsigned KReg = countTrailingZeros(Kills);
DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
freeStackSlotBefore(I, KReg);
Kills &= ~(1 << KReg);
// Load zeros for all the imp-defs.
while(Defs) {
- unsigned DReg = CountTrailingZeros_32(Defs);
+ unsigned DReg = countTrailingZeros(Defs);
DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
pushReg(DReg);
// Change from the pseudo instruction to the concrete instruction.
MI->RemoveOperand(0); // Remove the explicit ST(0) operand
MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
-
+
// Result gets pushed on the stack.
pushReg(DestReg);
}
unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+ if (KillsSrc)
+ duplicatePendingSTBeforeKill(Reg, I);
+
// FISTP64m is strange because there isn't a non-popping versions.
// If we have one _and_ we don't want to pop the operand, duplicate the value
// on the stack instead of moving it. This ensure that popping the value is
} else {
moveToTop(Reg, I); // Move to the top of the stack...
}
-
+
// Convert from the pseudo instruction to the concrete instruction.
MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand
MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
if (KillsSrc) {
+ duplicatePendingSTBeforeKill(Reg, I);
// If this is the last use of the source register, just make sure it's on
// the top of the stack.
moveToTop(Reg, I);
MI->RemoveOperand(1);
MI->getOperand(0).setReg(getSTReg(Op1));
MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
-
+
// If we kill the second operand, make sure to pop it from the stack.
if (Op0 != Op1 && KillsOp1) {
// Get this value off of the register stack.
// When the source is killed, allocate a scratch FP register.
if (KillsSrc) {
+ duplicatePendingSTBeforeKill(SrcFP, I);
unsigned Slot = getSlot(SrcFP);
unsigned SR = getScratchReg();
PendingST[DstST] = SR;
break;
}
+ case TargetOpcode::IMPLICIT_DEF: {
+ // All FP registers must be explicitly defined, so load a 0 instead.
+ unsigned Reg = MI->getOperand(0).getReg() - X86::FP0;
+ DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+ pushReg(Reg);
+ break;
+ }
+
case X86::FpPOP_RETVAL: {
// The FpPOP_RETVAL instruction is used after calls that return a value on
// the floating point stack. We cannot model this with ST defs since CALL
}
if (STUses && !isMask_32(STUses))
- report_fatal_error("Inline asm fixed inputs"
- " must be last on the x87 stack");
+ MI->emitError("fixed input regs must be last on the x87 stack");
unsigned NumSTUses = CountTrailingOnes_32(STUses);
// Defs must be contiguous from the stack top. ST0-STn.
- if (STDefs && !isMask_32(STDefs))
- report_fatal_error("Inline asm fixed outputs"
- " must be last on the x87 stack");
+ if (STDefs && !isMask_32(STDefs)) {
+ MI->emitError("output regs must be last on the x87 stack");
+ STDefs = NextPowerOf2(STDefs) - 1;
+ }
unsigned NumSTDefs = CountTrailingOnes_32(STDefs);
// So must the clobbered stack slots. ST0-STm, m >= n.
if (STClobbers && !isMask_32(STDefs | STClobbers))
- report_fatal_error("Inline asm clobbers must be last on the x87 stack");
+ MI->emitError("clobbers must be last on the x87 stack");
// Popped inputs are the ones that are also clobbered or defined.
unsigned STPopped = STUses & (STDefs | STClobbers);
if (STPopped && !isMask_32(STPopped))
- report_fatal_error("Inline asm popped inputs"
- " must be last on the x87 stack");
+ MI->emitError("implicitly popped regs must be last on the x87 stack");
unsigned NumSTPopped = CountTrailingOnes_32(STPopped);
DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
continue;
if (!Op.isUse())
- report_fatal_error("Illegal \"f\" output constraint in inline asm");
+ MI->emitError("illegal \"f\" output constraint");
unsigned FPReg = getFPReg(Op);
FPUsed |= 1U << FPReg;
// Note: this might be a non-optimal pop sequence. We might be able to do
// better by trying to pop in stack order or something.
while (FPKills) {
- unsigned FPReg = CountTrailingZeros_32(FPKills);
+ unsigned FPReg = countTrailingZeros(FPKills);
if (isLive(FPReg))
freeStackSlotAfter(InsertPt, FPReg);
FPKills &= ~(1U << FPReg);
return;
}
- case X86::RET:
- case X86::RETI:
+ case X86::WIN_FTOL_32:
+ case X86::WIN_FTOL_64: {
+ // Push the operand into ST0.
+ MachineOperand &Op = MI->getOperand(0);
+ assert(Op.isUse() && Op.isReg() &&
+ Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
+ unsigned FPReg = getFPReg(Op);
+ if (Op.isKill())
+ moveToTop(FPReg, I);
+ else
+ duplicateToTop(FPReg, FPReg, I);
+
+ // Emit the call. This will pop the operand.
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
+ .addExternalSymbol("_ftol2")
+ .addReg(X86::ST0, RegState::ImplicitKill)
+ .addReg(X86::ECX, RegState::ImplicitDefine)
+ .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::EDX, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ --StackTop;
+
+ break;
+ }
+
+ case X86::RETQ:
+ case X86::RETL:
+ case X86::RETIL:
+ case X86::RETIQ:
// If RET has an FP register use operand, pass the first one in ST(0) and
// the second one in ST(1).
// Assert that the top of stack contains the right FP register.
assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
"Top of stack not the right register for RET!");
-
+
// Ok, everything is good, mark the value as not being on the stack
// anymore so that our assertion about the stack being empty at end of
// block doesn't fire.
StackTop = 0;
return;
}
-
+
// Otherwise, we are returning two values:
// 2) If returning the same value for both, we only have one thing in the FP
// stack. Consider: RET FP1, FP1
if (StackTop == 1) {
assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
"Stack misconfiguration for RET!");
-
+
// Duplicate the TOS so that we return it twice. Just pick some other FPx
// register to hold it.
unsigned NewReg = getScratchReg();
duplicateToTop(FirstFPRegOp, NewReg, MI);
FirstFPRegOp = NewReg;
}
-
+
/// Okay we know we have two different FPx operands now:
assert(StackTop == 2 && "Must have two values live!");
-
+
/// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
/// in ST(1). In this case, emit an fxch.
if (getStackEntry(0) == SecondFPRegOp) {
assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
moveToTop(FirstFPRegOp, MI);
}
-
+
/// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
/// ST(1). Just remove both from our understanding of the stack and return.
assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");