+ duplicateToTop(SrcFP, DstFP, I);
+ }
+ break;
+ }
+
+ case TargetOpcode::IMPLICIT_DEF: {
+ // All FP registers must be explicitly defined, so load a 0 instead.
+ unsigned Reg = MI->getOperand(0).getReg() - X86::FP0;
+ DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+ pushReg(Reg);
+ break;
+ }
+
+ case X86::FpPOP_RETVAL: {
+ // The FpPOP_RETVAL instruction is used after calls that return a value on
+ // the floating point stack. We cannot model this with ST defs since CALL
+ // instructions have fixed clobber lists. This instruction is interpreted
+ // to mean that there is one more live register on the stack than we
+ // thought.
+ //
+ // This means that StackTop does not match the hardware stack between a
+ // call and the FpPOP_RETVAL instructions. We do tolerate FP instructions
+ // between CALL and FpPOP_RETVAL as long as they don't overflow the
+ // hardware stack.
+ unsigned DstFP = getFPReg(MI->getOperand(0));
+
+ // Move existing stack elements up to reflect reality.
+ assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL");
+ if (StackTop) {
+ std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1);
+ for (unsigned i = 0; i != NumFPRegs; ++i)
+ ++RegMap[i];
+ }
+ ++StackTop;
+
+ // DstFP is the new bottom of the stack.
+ Stack[0] = DstFP;
+ RegMap[DstFP] = 0;
+
+ // DstFP will be killed by processBasicBlock if this was a dead def.
+ break;
+ }
+
+ case TargetOpcode::INLINEASM: {
+ // The inline asm MachineInstr currently only *uses* FP registers for the
+ // 'f' constraint. These should be turned into the current ST(x) register
+ // in the machine instr.
+ //
+ // There are special rules for x87 inline assembly. The compiler must know
+ // exactly how many registers are popped and pushed implicitly by the asm.
+ // Otherwise it is not possible to restore the stack state after the inline
+ // asm.
+ //
+ // There are 3 kinds of input operands:
+ //
+ // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
+ // popped input operand must be in a fixed stack slot, and it is either
+ // tied to an output operand, or in the clobber list. The MI has ST use
+ // and def operands for these inputs.
+ //
+ // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
+ // preserved by the inline asm. The fixed stack slots must be STn-STm
+ // following the popped inputs. A fixed input operand cannot be tied to
+ // an output or appear in the clobber list. The MI has ST use operands
+ // and no defs for these inputs.
+ //
+ // 3. Preserved inputs. These inputs use the "f" constraint which is
+ // represented as an FP register. The inline asm won't change these
+ // stack slots.
+ //
+ // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
+ // registers do not count as output operands. The inline asm changes the
+ // stack as if it popped all the popped inputs and then pushed all the
+ // output operands.
+
+ // Scan the assembly for ST registers used, defined and clobbered. We can
+ // only tell clobbers from defs by looking at the asm descriptor.
+ unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
+ unsigned NumOps = 0;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
+ i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
+ unsigned Flags = MI->getOperand(i).getImm();
+ NumOps = InlineAsm::getNumOperandRegisters(Flags);
+ if (NumOps != 1)
+ continue;
+ const MachineOperand &MO = MI->getOperand(i + 1);
+ if (!MO.isReg())
+ continue;
+ unsigned STReg = MO.getReg() - X86::ST0;
+ if (STReg >= 8)
+ continue;
+
+ switch (InlineAsm::getKind(Flags)) {
+ case InlineAsm::Kind_RegUse:
+ STUses |= (1u << STReg);
+ break;
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegDefEarlyClobber:
+ STDefs |= (1u << STReg);
+ if (MO.isDead())
+ STDeadDefs |= (1u << STReg);
+ break;
+ case InlineAsm::Kind_Clobber:
+ STClobbers |= (1u << STReg);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (STUses && !isMask_32(STUses))
+ MI->emitError("fixed input regs must be last on the x87 stack");
+ unsigned NumSTUses = CountTrailingOnes_32(STUses);
+
+ // Defs must be contiguous from the stack top. ST0-STn.
+ if (STDefs && !isMask_32(STDefs)) {
+ MI->emitError("output regs must be last on the x87 stack");
+ STDefs = NextPowerOf2(STDefs) - 1;
+ }
+ unsigned NumSTDefs = CountTrailingOnes_32(STDefs);
+
+ // So must the clobbered stack slots. ST0-STm, m >= n.
+ if (STClobbers && !isMask_32(STDefs | STClobbers))
+ MI->emitError("clobbers must be last on the x87 stack");
+
+ // Popped inputs are the ones that are also clobbered or defined.
+ unsigned STPopped = STUses & (STDefs | STClobbers);
+ if (STPopped && !isMask_32(STPopped))
+ MI->emitError("implicitly popped regs must be last on the x87 stack");
+ unsigned NumSTPopped = CountTrailingOnes_32(STPopped);
+
+ DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
+ << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
+
+ // Scan the instruction for FP uses corresponding to "f" constraints.
+ // Collect FP registers to kill afer the instruction.
+ // Always kill all the scratch regs.
+ unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
+ unsigned FPUsed = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ if (!Op.isUse())
+ MI->emitError("illegal \"f\" output constraint");
+ unsigned FPReg = getFPReg(Op);
+ FPUsed |= 1U << FPReg;
+
+ // If we kill this operand, make sure to pop it from the stack after the
+ // asm. We just remember it for now, and pop them all off at the end in
+ // a batch.
+ if (Op.isKill())
+ FPKills |= 1U << FPReg;
+ }
+
+ // The popped inputs will be killed by the instruction, so duplicate them
+ // if the FP register needs to be live after the instruction, or if it is
+ // used in the instruction itself. We effectively treat the popped inputs
+ // as early clobbers.
+ for (unsigned i = 0; i < NumSTPopped; ++i) {
+ if ((FPKills & ~FPUsed) & (1u << PendingST[i]))
+ continue;
+ unsigned SR = getScratchReg();
+ duplicateToTop(PendingST[i], SR, I);
+ DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+ << unsigned(PendingST[i]) << " to avoid clobbering it.\n");
+ PendingST[i] = SR;
+ }
+
+ // Make sure we have a unique live register for every fixed use. Some of
+ // them could be undef uses, and we need to emit LD_F0 instructions.
+ for (unsigned i = 0; i < NumSTUses; ++i) {
+ if (i < NumPendingSTs && PendingST[i] < NumFPRegs) {
+ // Check for shared assignments.
+ for (unsigned j = 0; j < i; ++j) {
+ if (PendingST[j] != PendingST[i])
+ continue;
+ // STi and STj are inn the same register, create a copy.
+ unsigned SR = getScratchReg();
+ duplicateToTop(PendingST[i], SR, I);
+ DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+ << unsigned(PendingST[i])
+ << " to avoid collision with ST" << j << '\n');
+ PendingST[i] = SR;
+ }
+ continue;
+ }
+ unsigned SR = getScratchReg();
+ DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n');
+ BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+ pushReg(SR);
+ PendingST[i] = SR;
+ if (NumPendingSTs == i)
+ ++NumPendingSTs;