}
}
+static unsigned getLEArOpcode(unsigned is64Bit) {
+ return is64Bit ? X86::LEA64r : X86::LEA32r;
+}
+
/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
/// when it reaches the "return" instruction. We can then pop a stack object
/// to this register without worry about clobbering it.
static
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, int64_t NumBytes,
- bool Is64Bit, const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI) {
+ bool Is64Bit, bool UseLEA,
+ const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
- unsigned Opc = isSub ?
- getSUBriOpcode(Is64Bit, Offset) :
- getADDriOpcode(Is64Bit, Offset);
+ unsigned Opc;
+ if (UseLEA)
+ Opc = getLEArOpcode(Is64Bit);
+ else
+ Opc = isSub
+ ? getSUBriOpcode(Is64Bit, Offset)
+ : getADDriOpcode(Is64Bit, Offset);
+
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
}
}
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(ThisVal);
+ MachineInstr *MI = NULL;
+
+ if (UseLEA) {
+ MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
+ StackPtr, false, isSub ? -ThisVal : ThisVal);
+ } else {
+ MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(ThisVal);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ }
+
if (isSub)
MI->setFlag(MachineInstr::FrameSetup);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+
Offset -= ThisVal;
}
}
MachineBasicBlock::iterator PI = prior(MBBI);
unsigned Opc = PI->getOpcode();
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
+ Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
PI->getOperand(0).getReg() == StackPtr) {
if (NumBytes)
*NumBytes += PI->getOperand(2).getImm();
}
/// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
-/// stack adjustment is returned as a positive value for ADD and a negative for
+/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD/LEA and a negative for
/// SUB.
static int mergeSPUpdates(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
int Offset = 0;
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
+ Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
PI->getOperand(0).getReg() == StackPtr){
Offset += PI->getOperand(2).getImm();
MBB.erase(PI);
const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
// Encode the registers in the order they were saved, 3-bits per register. The
- // registers are numbered from 1 to 6.
+ // registers are numbered from 1 to CU_NUM_SAVED_REGS.
uint32_t RegEnc = 0;
- for (int I = 0; I != 6; --I) {
+ for (int I = CU_NUM_SAVED_REGS - 1, Idx = 0; I != -1; --I) {
unsigned Reg = SavedRegs[I];
- if (Reg == 0) break;
+ if (Reg == 0) continue;
+
int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
- if (CURegNum == -1)
- return ~0U;
+ if (CURegNum == -1) return ~0U;
// Encode the 3-bit register number in order, skipping over 3-bits for each
// register.
- RegEnc |= (CURegNum & 0x7) << ((5 - I) * 3);
+ RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
}
- assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!");
+ assert((RegEnc & 0x3FFFF) == RegEnc && "Invalid compact register encoding!");
return RegEnc;
}
bool HasFP = hasFP(MF);
bool Is64Bit = STI.is64Bit();
bool IsWin64 = STI.isTargetWin64();
+ bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// FIXME: %rax preserves the offset and should be available.
if (isSPUpdateNeeded)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
- TII, *RegInfo);
+ UseLEA, TII, *RegInfo);
if (isEAXAlive) {
// Restore EAX
}
} else if (NumBytes)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
- TII, *RegInfo);
+ UseLEA, TII, *RegInfo);
if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc DL = MBBI->getDebugLoc();
bool Is64Bit = STI.is64Bit();
+ bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// We cannot use LEA here, because stack pointer was realigned. We need to
// deallocate local frame back.
if (CSSize) {
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII,
+ *RegInfo);
MBBI = prior(LastCSPop);
}
}
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, *RegInfo);
}
// We're returning from function via eh_return.
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, UseLEA, TII, *RegInfo);
}
// Jump to label or value in register.
// Check for possible merge with preceding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, UseLEA, TII, *RegInfo);
}
}
return false;
}
+
+/// GetScratchRegister - Get a register for performing work in the segmented
+/// stack prologue. Depending on platform and the properties of the function
+/// either one or two registers will be needed. Set primary to true for
+/// the first register, false for the second.
static unsigned
-GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
- if (Is64Bit) {
- return X86::R11;
- } else {
- CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
- bool IsNested = HasNestArgument(&MF);
-
- if (CallingConvention == CallingConv::X86_FastCall) {
- if (IsNested) {
- report_fatal_error("Segmented stacks does not support fastcall with "
- "nested function.");
- return -1;
- } else {
- return X86::EAX;
- }
- } else {
- if (IsNested)
- return X86::EDX;
- else
- return X86::ECX;
- }
+GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
+ if (Is64Bit)
+ return Primary ? X86::R11 : X86::R12;
+
+ CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
+ bool IsNested = HasNestArgument(&MF);
+
+ if (CallingConvention == CallingConv::X86_FastCall ||
+ CallingConvention == CallingConv::Fast) {
+ if (IsNested)
+ report_fatal_error("Segmented stacks does not support fastcall with "
+ "nested function.");
+ return Primary ? X86::EAX : X86::ECX;
}
+ if (IsNested)
+ return Primary ? X86::EDX : X86::EAX;
+ return Primary ? X86::ECX : X86::EAX;
}
// The stack limit in the TCB is set to this many bytes above the actual stack
DebugLoc DL;
const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
- unsigned ScratchReg = GetScratchRegister(Is64Bit, MF);
+ unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
"Scratch register is live-in");
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");
- if (!ST->isTargetLinux())
- report_fatal_error("Segmented stacks supported only on linux.");
+ if (!ST->isTargetLinux() && !ST->isTargetDarwin() &&
+ !ST->isTargetWin32() && !ST->isTargetFreeBSD())
+ report_fatal_error("Segmented stacks not supported on this platform.");
MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
// prologue.
StackSize = MFI->getStackSize();
+ // When the frame size is less than 256 we just compare the stack
+ // boundary directly to the value of the stack pointer, per gcc.
+ bool CompareStackPointer = StackSize < kSplitStackAvailable;
+
// Read the limit off the current stacklet off the stack_guard location.
if (Is64Bit) {
- TlsReg = X86::FS;
- TlsOffset = 0x70;
+ if (ST->isTargetLinux()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x70;
+ } else if (ST->isTargetDarwin()) {
+ TlsReg = X86::GS;
+ TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
+ } else if (ST->isTargetFreeBSD()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x18;
+ } else {
+ report_fatal_error("Segmented stacks not supported on this platform.");
+ }
- if (StackSize < kSplitStackAvailable)
+ if (CompareStackPointer)
ScratchReg = X86::RSP;
else
BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
- .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+ .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
- .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else {
- TlsReg = X86::GS;
- TlsOffset = 0x30;
+ if (ST->isTargetLinux()) {
+ TlsReg = X86::GS;
+ TlsOffset = 0x30;
+ } else if (ST->isTargetDarwin()) {
+ TlsReg = X86::GS;
+ TlsOffset = 0x48 + 90*4;
+ } else if (ST->isTargetWin32()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x14; // pvArbitrary, reserved for application use
+ } else if (ST->isTargetFreeBSD()) {
+ report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
+ } else {
+ report_fatal_error("Segmented stacks not supported on this platform.");
+ }
- if (StackSize < kSplitStackAvailable)
+ if (CompareStackPointer)
ScratchReg = X86::ESP;
else
BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
- .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+ .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
+
+ if (ST->isTargetLinux() || ST->isTargetWin32()) {
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ } else if (ST->isTargetDarwin()) {
+
+ // TlsOffset doesn't fit into a mod r/m byte so we need an extra register
+ unsigned ScratchReg2;
+ bool SaveScratch2;
+ if (CompareStackPointer) {
+ // The primary scratch register is available for holding the TLS offset
+ ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
+ SaveScratch2 = false;
+ } else {
+ // Need to use a second register to hold the TLS offset
+ ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
+
+ // Unfortunately, with fastcc the second scratch register may hold an arg
+ SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
+ }
- BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
- .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ // If Scratch2 is live-in then it needs to be saved
+ assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
+ "Scratch register is live-in and not saved");
+
+ if (SaveScratch2)
+ BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
+ .addReg(ScratchReg2, RegState::Kill);
+
+ BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
+ .addImm(TlsOffset);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
+ .addReg(ScratchReg)
+ .addReg(ScratchReg2).addImm(1).addReg(0)
+ .addImm(0)
+ .addReg(TlsReg);
+
+ if (SaveScratch2)
+ BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
+ }
}
// This jump is taken if SP >= (Stacklet Limit + Stack Space required).
// It jumps to normal execution of the function body.
- BuildMI(checkMBB, DL, TII.get(X86::JG_4)).addMBB(&prologueMBB);
+ BuildMI(checkMBB, DL, TII.get(X86::JA_4)).addMBB(&prologueMBB);
// On 32 bit we first push the arguments size and then the frame size. On 64
// bit, we pass the stack frame size in r10 and the argument size in r11.