//===-- X86ISelSimple.cpp - A simple instruction selector for x86 ---------===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file was developed by the LLVM research group and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines a simple peephole instruction selector for the x86 target
/// the entire function.
///
bool runOnFunction(Function &Fn) {
+ // Lazily create a stack slot for the return address if needed.
+ ReturnAddressIndex = 0;
+
// First pass over the function, lower any unknown intrinsic functions
// with the IntrinsicLowering class.
LowerUnknownIntrinsicFunctionCalls(Fn);
BB = &F->front();
- // Set up a frame object for the return address. This is used by the
- // llvm.returnaddress & llvm.frameaddress intrinisics.
- ReturnAddressIndex = F->getFrameInfo()->CreateFixedObject(4, -4);
-
// Copy incoming arguments off of the stack...
LoadArgumentsToVirtualRegs(Fn);
+ // If this is main, emit special code.
+ if (Fn.hasExternalLinkage() && Fn.getName() == "main")
+ EmitSpecialCodeForMain();
+
// Instruction select everything except PHI nodes
visit(Fn);
return "X86 Simple Instruction Selection";
}
+ /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
+ /// the main function.
+ void EmitSpecialCodeForMain();
+
/// visitBasicBlock - This method is called when we are visiting a new basic
/// block. This simply creates a new MachineBasicBlock to emit code into
/// and adds it to the current MachineFunction. Subsequent visit* for
MachineBasicBlock *MBB,
MachineBasicBlock::iterator MBBI);
void visitSelectInst(SelectInst &SI);
-
-
+
+
// Memory Instructions
void visitLoadInst(LoadInst &I);
void visitStoreInst(StoreInst &I);
void visitAllocaInst(AllocaInst &I);
void visitMallocInst(MallocInst &I);
void visitFreeInst(FreeInst &I);
-
+
// Other operators
void visitShiftInst(ShiftInst &I);
void visitPHINode(PHINode &I) {} // PHI nodes handled by second pass
void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
unsigned DestReg, const Type *DestTy,
unsigned Op0Reg, unsigned Op1Reg);
- void doMultiplyConst(MachineBasicBlock *MBB,
+ void doMultiplyConst(MachineBasicBlock *MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, const Type *DestTy,
unsigned Op0Reg, unsigned Op1Val);
// Emit code for a 'SHLD DestReg, Op0, Op1, Amt' operation, where Amt is a
// constant.
- void doSHLDConst(MachineBasicBlock *MBB,
+ void doSHLDConst(MachineBasicBlock *MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, unsigned Op0Reg, unsigned Op1Reg,
unsigned Op1Val);
-
+
/// emitSelectOperation - Common code shared between visitSelectInst and the
/// constant expression support.
void emitSelectOperation(MachineBasicBlock *MBB,
} else if (CastInst *CI = dyn_cast<CastInst>(V)) {
// Do not emit noop casts at all, unless it's a double -> float cast.
if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType()) &&
- (CI->getType() != Type::FloatTy ||
+ (CI->getType() != Type::FloatTy ||
CI->getOperand(0)->getType() != Type::DoubleTy))
return getReg(CI->getOperand(0), MBB, IPt);
} else if (AllocaInst *AI = dyn_castFixedAlloca(V)) {
unsigned TySize = TM.getTargetData().getTypeSize(Ty);
TySize *= CUI->getValue(); // Get total allocated size...
unsigned Alignment = TM.getTargetData().getTypeAlignment(Ty);
-
+
// Create a new stack object using the frame manager...
int FrameIdx = F->getFrameInfo()->CreateStackObject(TySize, Alignment);
AllocaMap.insert(I, std::make_pair(AI, FrameIdx));
BuildMI(*MBB, IP, X86::FLD0, 0, R);
else if (CFP->isExactlyValue(+1.0))
BuildMI(*MBB, IP, X86::FLD1, 0, R);
- else {
- // Otherwise we need to spill the constant to memory...
+ else if (CFP->isExactlyValue(-0.0)) {
+ unsigned Tmp = makeAnotherReg(Type::DoubleTy);
+ BuildMI(*MBB, IP, X86::FLD0, 0, Tmp);
+ BuildMI(*MBB, IP, X86::FCHS, 1, R).addReg(Tmp);
+ } else if (CFP->isExactlyValue(-1.0)) {
+ unsigned Tmp = makeAnotherReg(Type::DoubleTy);
+ BuildMI(*MBB, IP, X86::FLD1, 0, Tmp);
+ BuildMI(*MBB, IP, X86::FCHS, 1, R).addReg(Tmp);
+ } else { // FIXME: PI, other native values
+ // FIXME: 2*PI -> LDPI + FADD
+
+ // Otherwise we need to spill the constant to memory.
MachineConstantPool *CP = F->getConstantPool();
- unsigned CPI = CP->getConstantPoolIndex(CFP);
+
const Type *Ty = CFP->getType();
+ // If a FP immediate is precise when represented as a float, we put it
+ // into the constant pool as a float, even if it's is statically typed as
+ // a double.
+ if (Ty == Type::DoubleTy)
+ if (CFP->isExactlyValue((float)CFP->getValue())) {
+ Ty = Type::FloatTy;
+ CFP = cast<ConstantFP>(ConstantExpr::getCast(CFP, Ty));
+ }
+
+ unsigned CPI = CP->getConstantPoolIndex(CFP);
+
assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
unsigned LoadOpcode = Ty == Type::FloatTy ? X86::FLD32m : X86::FLD64m;
addConstantPoolReference(BuildMI(*MBB, IP, LoadOpcode, 4, R), CPI);
// [ESP] -- return address
// [ESP + 4] -- first argument (leftmost lexically)
// [ESP + 8] -- second argument, if first argument is four bytes in size
- // ...
+ // ...
//
unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot
MachineFrameInfo *MFI = F->getFrameInfo();
- for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) {
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
+ I != E; ++I) {
bool ArgLive = !I->use_empty();
unsigned Reg = ArgLive ? getReg(*I) : 0;
int FI; // Frame object index
// llvm.va_start.
if (Fn.getFunctionType()->isVarArg())
VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
+
+ // Finally, inform the compiler what our live-outs will be, aka, what we will
+ // be returning in registers.
+ if (Fn.getReturnType() != Type::VoidTy)
+ switch (getClassB(Fn.getReturnType())) {
+ default: assert(0 && "Unknown type!");
+ case cByte:
+ case cShort:
+ case cInt:
+ F->addLiveOut(X86::EAX);
+ break;
+ case cLong:
+ F->addLiveOut(X86::EAX);
+ F->addLiveOut(X86::EDX);
+ break;
+ case cFP:
+ F->addLiveOut(X86::ST0);
+ break;
+ }
}
+/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
+/// the main function.
+void X86ISel::EmitSpecialCodeForMain() {
+ // Switch the FPU to 64-bit precision mode for better compatibility and speed.
+ int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
+ addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
+
+ // Set the high part to be 64-bit precision.
+ addFrameReference(BuildMI(BB, X86::MOV8mi, 5),
+ CWFrameIdx, 1).addImm(2);
+
+ // Reload the modified control word now.
+ addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
+}
/// SelectPHINodes - Insert machine code to generate phis. This is tricky
/// because we have to generate our sources into the source basic blocks, not
// predecessor. Recycle it.
ValReg = EntryIt->second;
- } else {
+ } else {
// Get the incoming value into a virtual register.
//
Value *Val = PN->getIncomingValue(i);
// might be arbitrarily complex if it is a constant expression),
// just insert the computation at the top of the basic block.
MachineBasicBlock::iterator PI = PredMBB->begin();
-
+
// Skip over any PHI nodes though!
while (PI != PredMBB->end() && PI->getOpcode() == X86::PHI)
++PI;
-
+
ValReg = getReg(Val, PredMBB, PI);
}
MachineOperand& MO = I->getOperand(i);
if (MO.isRegister() && MO.getReg()) {
unsigned Reg = MO.getReg();
- if (MRegisterInfo::isVirtualRegister(Reg))
- if (RegMap.getRegClass(Reg)->getSize() == 10)
+ if (MRegisterInfo::isVirtualRegister(Reg)) {
+ unsigned RegSize = RegMap.getRegClass(Reg)->getSize();
+ if (RegSize == 10 || RegSize == 8)
goto UsesFPReg;
+ }
}
}
// If we haven't found an FP register use or def in this basic block, check
MachineBasicBlock *SBB = *SI;
for (MachineBasicBlock::iterator I = SBB->begin();
I != SBB->end() && I->getOpcode() == X86::PHI; ++I) {
- if (RegMap.getRegClass(I->getOperand(0).getReg())->getSize() == 10)
+ const TargetRegisterClass *RC =
+ RegMap.getRegClass(I->getOperand(0).getReg());
+ if (RC->getSize() == 10 || RC->getSize() == 8)
goto UsesFPReg;
}
}
UsesFPReg:
// Okay, this block uses an FP register. If the block has successors (ie,
// it's not an unwind/return), insert the FP_REG_KILL instruction.
- if (BB->succ_size () && RequiresFPRegKill(BB)) {
+ if (BB->succ_size() && RequiresFPRegKill(BB)) {
BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
++NumFPKill;
}
// canFoldSetCCIntoBranchOrSelect - Return the setcc instruction if we can fold
// it into the conditional branch or select instruction which is the only user
// of the cc instruction. This is the case if the conditional branch is the
-// only user of the setcc. We also don't handle long arguments below, so we
+// only user of the setcc. We also don't handle long arguments below, so we
// reject them here as well.
//
static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) {
if (SetCondInst *SCI = dyn_cast<SetCondInst>(V))
if (SCI->hasOneUse()) {
Instruction *User = cast<Instruction>(SCI->use_back());
- if (isa<BranchInst>(User) || (isa<SelectInst>(User) &&
- User->getOperand(0) == V))
+ if ((isa<BranchInst>(User) || isa<SelectInst>(User)) &&
+ (getClassB(SCI->getOperand(0)->getType()) != cLong ||
+ SCI->getOpcode() == Instruction::SetEQ ||
+ SCI->getOpcode() == Instruction::SetNE) &&
+ (isa<BranchInst>(User) || User->getOperand(0) == V))
return SCI;
}
return 0;
static const unsigned TESTTab[] = {
X86::TEST8ri, X86::TEST16ri, X86::TEST32ri
};
-
+
// Emit test X, i
unsigned LHS = getReg(Op0I->getOperand(0), MBB, IP);
unsigned Imm =
cast<ConstantInt>(Op0I->getOperand(1))->getRawValue();
BuildMI(*MBB, IP, TESTTab[Class], 2).addReg(LHS).addImm(Imm);
-
+
if (OpNum == 2) return 6; // Map jl -> js
if (OpNum == 3) return 7; // Map jg -> jns
return OpNum;
BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
return OpNum;
} else {
- // To compare A op B, compute A-B, and check the result flag.
- unsigned LowTmp = makeAnotherReg(Type::IntTy);
- unsigned HiTmp = makeAnotherReg(Type::IntTy);
- BuildMI(*MBB, IP, X86::SUB32ri, 2, LowTmp).addReg(Op0r).addImm(LowCst);
- BuildMI(*MBB, IP, X86::SBB32ri, 2, HiTmp).addReg(Op0r+1).addImm(HiCst);
+ // Emit a sequence of code which compares the high and low parts once
+ // each, then uses a conditional move to handle the overflow case. For
+ // example, a setlt for long would generate code like this:
+ //
+ // AL = lo(op1) < lo(op2) // Always unsigned comparison
+ // BL = hi(op1) < hi(op2) // Signedness depends on operands
+ // dest = hi(op1) == hi(op2) ? BL : AL;
+ //
+
+ // FIXME: This would be much better if we had hierarchical register
+ // classes! Until then, hardcode registers so that we can deal with
+ // their aliases (because we don't have conditional byte moves).
+ //
+ BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst);
+ BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
+ BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst);
+ BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL);
+ BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
+ BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
+ BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX)
+ .addReg(X86::AX);
+ // NOTE: visitSetCondInst knows that the value is dumped into the BL
+ // register at this point for long values...
return OpNum;
}
}
BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
break; // Allow the sete or setne to be generated from flags set by OR
} else {
- // To compare A op B, compute A-B, and check the result flag.
- unsigned LowTmp = makeAnotherReg(Type::IntTy);
- unsigned HiTmp = makeAnotherReg(Type::IntTy);
- BuildMI(*MBB, IP, X86::SUB32rr, 2, LowTmp).addReg(Op0r).addReg(Op1r);
- BuildMI(*MBB, IP, X86::SBB32rr, 2, HiTmp).addReg(Op0r+1).addReg(Op1r+1);
- return OpNum;
-
// Emit a sequence of code which compares the high and low parts once
// each, then uses a conditional move to handle the overflow case. For
// example, a setlt for long would generate code like this:
}
/// SetCC instructions - Here we just emit boilerplate code to set a byte-sized
-/// register, then move it to wherever the result should be.
+/// register, then move it to wherever the result should be.
///
void X86ISel::visitSetCondInst(SetCondInst &I) {
if (canFoldSetCCIntoBranchOrSelect(&I))
unsigned CompClass = getClassB(CompTy);
bool isSigned = CompTy->isSigned() && CompClass != cFP;
- BuildMI(*MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, TargetReg);
+ if (CompClass != cLong || OpNum < 2) {
+ // Handle normal comparisons with a setcc instruction...
+ BuildMI(*MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, TargetReg);
+ } else {
+ // Handle long comparisons by copying the value which is already in BL into
+ // the register we want...
+ BuildMI(*MBB, IP, X86::MOV8rr, 1, TargetReg).addReg(X86::BL);
+ }
}
void X86ISel::visitSelectInst(SelectInst &SI) {
emitSelectOperation(BB, MII, SI.getCondition(), SI.getTrueValue(),
SI.getFalseValue(), DestReg);
}
-
+
/// emitSelect - Common code shared between visitSelectInst and the constant
/// expression support.
void X86ISel::emitSelectOperation(MachineBasicBlock *MBB,
Value *Cond, Value *TrueVal, Value *FalseVal,
unsigned DestReg) {
unsigned SelectClass = getClassB(TrueVal->getType());
-
+
// We don't support 8-bit conditional moves. If we have incoming constants,
// transform them into 16-bit constants to avoid having a run-time conversion.
if (SelectClass == cByte) {
unsigned Opcode;
if (SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(Cond)) {
// We successfully folded the setcc into the select instruction.
-
+
unsigned OpNum = getSetCCNumber(SCI->getOpcode());
OpNum = EmitComparison(OpNum, SCI->getOperand(0), SCI->getOperand(1), MBB,
IP);
const Type *CompTy = SCI->getOperand(0)->getType();
bool isSigned = CompTy->isSigned() && getClassB(CompTy) != cFP;
-
+
// LLVM -> X86 signed X86 unsigned
// ----- ---------- ------------
// seteq -> cmovNE cmovNE
// ----
// cmovNS // Used by comparison with 0 optimization
// cmovS
-
+
switch (SelectClass) {
default: assert(0 && "Unknown value class!");
case cFP: {
// Long comparisons end up in the BL register.
CondReg = X86::BL;
}
-
+
BuildMI(*MBB, IP, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
Opcode = X86::FCMOVE;
}
case cShort:
case cInt:
promote32(X86::EAX, ValueRecord(RetVal));
- // Declare that EAX is live on exit
- BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::EAX).addReg(X86::ESP);
break;
case cFP: { // Floats & Doubles: Return in ST(0)
unsigned RetReg = getReg(RetVal);
BuildMI(BB, X86::FpSETRESULT, 1).addReg(RetReg);
- // Declare that top-of-stack is live on exit
- BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::ST0).addReg(X86::ESP);
break;
}
case cLong: {
unsigned RetReg = getReg(RetVal);
BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(RetReg);
BuildMI(BB, X86::MOV32rr, 1, X86::EDX).addReg(RetReg+1);
- // Declare that EAX & EDX are live on exit
- BuildMI(BB, X86::IMPLICIT_USE, 3).addReg(X86::EAX).addReg(X86::EDX)
- .addReg(X86::ESP);
break;
}
default:
BuildMI(BB, X86::JNE, 1).addMBB(MBBMap[BI.getSuccessor(0)]);
} else {
BuildMI(BB, X86::JE, 1).addMBB(MBBMap[BI.getSuccessor(1)]);
-
+
if (BI.getSuccessor(0) != NextBB)
BuildMI(BB, X86::JMP, 1).addMBB(MBBMap[BI.getSuccessor(0)]);
}
const Type *CompTy = SCI->getOperand(0)->getType();
bool isSigned = CompTy->isSigned() && getClassB(CompTy) != cFP;
-
+
// LLVM -> X86 signed X86 unsigned
// ----- ---------- ------------
{ X86::JE, X86::JNE, X86::JL, X86::JGE, X86::JG, X86::JLE,
X86::JS, X86::JNS },
};
-
+
if (BI.getSuccessor(0) != NextBB) {
BuildMI(BB, OpcodeTab[isSigned][OpNum], 1)
.addMBB(MBBMap[BI.getSuccessor(0)]);
} else if (Args[i].Val && isa<ConstantPointerNull>(Args[i].Val)) {
addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
X86::ESP, ArgOffset).addImm(0);
+ } else if (Args[i].Val && isa<GlobalValue>(Args[i].Val)) {
+ addRegOffset(BuildMI(BB, X86::MOV32mi, 5), X86::ESP, ArgOffset)
+ .addGlobalAddress(cast<GlobalValue>(Args[i].Val));
} else {
ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
}
ArgOffset += 4; // 8 byte entry, not 4.
break;
-
+
case cFP:
- ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
- if (Args[i].Ty == Type::FloatTy) {
- addRegOffset(BuildMI(BB, X86::FST32m, 5),
- X86::ESP, ArgOffset).addReg(ArgReg);
+ if (ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(Args[i].Val)) {
+ // Store constant FP values with integer instructions to avoid having
+ // to load the constants from the constant pool then do a store.
+ if (CFP->getType() == Type::FloatTy) {
+ union {
+ unsigned I;
+ float F;
+ } V;
+ V.F = CFP->getValue();
+ addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
+ X86::ESP, ArgOffset).addImm(V.I);
+ } else {
+ union {
+ uint64_t I;
+ double F;
+ } V;
+ V.F = CFP->getValue();
+ addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
+ X86::ESP, ArgOffset).addImm((unsigned)V.I);
+ addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
+ X86::ESP, ArgOffset+4).addImm(unsigned(V.I >> 32));
+ ArgOffset += 4; // 8 byte entry, not 4.
+ }
} else {
- assert(Args[i].Ty == Type::DoubleTy && "Unknown FP type!");
- addRegOffset(BuildMI(BB, X86::FST64m, 5),
- X86::ESP, ArgOffset).addReg(ArgReg);
- ArgOffset += 4; // 8 byte entry, not 4.
+ ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
+ if (Args[i].Ty == Type::FloatTy) {
+ addRegOffset(BuildMI(BB, X86::FST32m, 5),
+ X86::ESP, ArgOffset).addReg(ArgReg);
+ } else {
+ assert(Args[i].Ty == Type::DoubleTy && "Unknown FP type!");
+ addRegOffset(BuildMI(BB, X86::FST64m, 5),
+ X86::ESP, ArgOffset).addReg(ArgReg);
+ ArgOffset += 4; // 8 byte entry, not 4.
+ }
}
break;
if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) {
visitIntrinsicCall(ID, CI); // Special intrinsics are not handled here
return;
+ } else if (F->getName() == "fabs" || F->getName() == "fabsf") {
+ if (CI.getNumOperands() == 2 && // Basic sanity checks.
+ CI.getOperand(1)->getType()->isFloatingPoint() &&
+ CI.getType() == CI.getOperand(1)->getType()) {
+ unsigned op1Reg = getReg(CI.getOperand(1));
+ unsigned DestReg = getReg(CI);
+ BuildMI(BB, X86::FABS, 1, DestReg).addReg(op1Reg);
+ return;
+ }
}
// Emit a CALL instruction with PC-relative displacement.
unsigned DestReg = CI.getType() != Type::VoidTy ? getReg(CI) : 0;
doCall(ValueRecord(DestReg, CI.getType()), TheCall, Args);
-}
+}
/// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the
/// function, lowering any calls to unknown intrinsic functions into the
case Intrinsic::readio: {
// On X86, memory operations are in-order. Lower this intrinsic
// into a volatile load.
- Instruction *Before = CI->getPrev();
LoadInst * LI = new LoadInst(CI->getOperand(1), "", true, CI);
CI->replaceAllUsesWith(LI);
BB->getInstList().erase(CI);
case Intrinsic::writeio: {
// On X86, memory operations are in-order. Lower this intrinsic
// into a volatile store.
- Instruction *Before = CI->getPrev();
StoreInst *LI = new StoreInst(CI->getOperand(1),
CI->getOperand(2), true, CI);
CI->replaceAllUsesWith(LI);
case Intrinsic::frameaddress:
TmpReg1 = getReg(CI);
if (cast<Constant>(CI.getOperand(1))->isNullValue()) {
+ if (ReturnAddressIndex == 0) {
+ // Set up a frame object for the return address.
+ ReturnAddressIndex = F->getFrameInfo()->CreateFixedObject(4, -4);
+ }
+
if (ID == Intrinsic::returnaddress) {
// Just load the return address
addFrameReference(BuildMI(BB, X86::MOV32rm, 4, TmpReg1),
BuildMI(BB, Opc[Class], 0);
return;
}
-
+
default: assert(0 && "Error: unknown intrinsics should have been lowered!");
}
}
// Special case: op Reg, load [mem]
if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1) && Class != cLong &&
- Op0->hasOneUse() &&
+ Op0->hasOneUse() &&
isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op0), B))
if (!B.swapOperands())
std::swap(Op0, Op1); // Make sure any loads are in the RHS.
// Arithmetic operators
{ X86::ADD8rm, X86::ADD16rm, X86::ADD32rm }, // ADD
{ X86::SUB8rm, X86::SUB16rm, X86::SUB32rm }, // SUB
-
+
// Bitwise operators
{ X86::AND8rm, X86::AND16rm, X86::AND32rm }, // AND
{ X86:: OR8rm, X86:: OR16rm, X86:: OR32rm }, // OR
} else {
X86AddressMode AM;
getAddressingMode(cast<LoadInst>(Op1)->getOperand(0), AM);
-
+
addFullAddress(BuildMI(BB, Opcode, 5, DestReg).addReg(Op0r), AM);
}
return;
// If this is a floating point subtract, check to see if we can fold the first
// operand in.
if (Class == cFP && OperatorClass == 1 &&
- isa<LoadInst>(Op0) &&
+ isa<LoadInst>(Op0) &&
isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op0), B)) {
const Type *Ty = Op0->getType();
assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
} else {
X86AddressMode AM;
getAddressingMode(cast<LoadInst>(Op0)->getOperand(0), AM);
-
+
addFullAddress(BuildMI(BB, Opcode, 5, DestReg).addReg(Op1r), AM);
}
return;
DestReg).addReg(Op0r), CPI);
return;
}
-
+
// Special case: R1 = op <const fp>, R2
if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op0))
if (CFP->isExactlyValue(-0.0) && OperatorClass == 1) {
{ X86::FADD32m, X86::FSUBR32m, X86::FMUL32m, X86::FDIVR32m }, // Float
{ X86::FADD64m, X86::FSUBR64m, X86::FMUL64m, X86::FDIVR64m }, // Double
};
-
+
assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass];
unsigned Op1r = getReg(Op1, BB, IP);
void X86ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
Value *Op0, Value *Op1,
- unsigned OperatorClass,
+ unsigned OperatorClass,
unsigned DestReg) {
unsigned Class = getClassB(Op0->getType());
if (CI->isNullValue()) {
unsigned op1Reg = getReg(Op1, MBB, IP);
BuildMI(*MBB, IP, NEGTab[Class], 1, DestReg).addReg(op1Reg);
-
+
if (Class == cLong) {
// We just emitted: Dl = neg Sl
// Now emit : T = addc Sh, 0
// sub C, X -> tmp = neg X; DestReg = add tmp, C. This is better
// than copying C into a temporary register, because of register
// pressure (tmp and destreg can share a register.
- static unsigned const ADDRITab[] = {
+ static unsigned const ADDRITab[] = {
X86::ADD8ri, X86::ADD16ri, X86::ADD32ri, 0, X86::ADD32ri
};
unsigned op1Reg = getReg(Op1, MBB, IP);
BuildMI(*MBB, IP, INCTab[Class], 1, DestReg).addReg(Op0r);
return;
}
-
+
static const unsigned OpcodeTab[][5] = {
// Arithmetic operators
{ X86::ADD8ri, X86::ADD16ri, X86::ADD32ri, 0, X86::ADD32ri }, // ADD
{ X86::SUB8ri, X86::SUB16ri, X86::SUB32ri, 0, X86::SUB32ri }, // SUB
-
+
// Bitwise operators
{ X86::AND8ri, X86::AND16ri, X86::AND32ri, 0, X86::AND32ri }, // AND
{ X86:: OR8ri, X86:: OR16ri, X86:: OR32ri, 0, X86::OR32ri }, // OR
{ X86::XOR8ri, X86::XOR16ri, X86::XOR32ri, 0, X86::XOR32ri }, // XOR
};
-
+
unsigned Opcode = OpcodeTab[OperatorClass][Class];
unsigned Op1l = cast<ConstantInt>(Op1C)->getRawValue();
BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
return;
}
-
+
// If this is a long value and the high or low bits have a special
// property, emit some special cases.
unsigned Op1h = cast<ConstantInt>(Op1C)->getRawValue() >> 32LL;
-
+
// If the constant is zero in the low 32-bits, just copy the low part
// across and apply the normal 32-bit operation to the high parts. There
// will be no carry or borrow into the top.
.addReg(Op0r+1).addImm(Op1h);
return;
}
-
+
// If this is a logical operation and the top 32-bits are zero, just
// operate on the lower 32.
if (Op1h == 0 && OperatorClass > 1) {
BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
return;
}
-
+
// TODO: We could handle lots of other special cases here, such as AND'ing
// with 0xFFFFFFFF00000000 -> noop, etc.
-
+
// Otherwise, code generate the full operation with a constant.
static const unsigned TopTab[] = {
X86::ADC32ri, X86::SBB32ri, X86::AND32ri, X86::OR32ri, X86::XOR32ri
};
-
+
BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
BuildMI(*MBB, IP, TopTab[OperatorClass], 2, DestReg+1)
.addReg(Op0r+1).addImm(Op1h);
// Arithmetic operators
{ X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, 0, X86::ADD32rr }, // ADD
{ X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, 0, X86::SUB32rr }, // SUB
-
+
// Bitwise operators
{ X86::AND8rr, X86::AND16rr, X86::AND32rr, 0, X86::AND32rr }, // AND
{ X86:: OR8rr, X86:: OR16rr, X86:: OR32rr, 0, X86:: OR32rr }, // OR
{ X86::XOR8rr, X86::XOR16rr, X86::XOR32rr, 0, X86::XOR32rr }, // XOR
};
-
+
unsigned Opcode = OpcodeTab[OperatorClass][Class];
unsigned Op0r = getReg(Op0, MBB, IP);
unsigned Op1r = getReg(Op1, MBB, IP);
BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
-
+
if (Class == cLong) { // Handle the upper 32 bits of long values...
static const unsigned TopTab[] = {
X86::ADC32rr, X86::SBB32rr, X86::AND32rr, X86::OR32rr, X86::XOR32rr
return;
}
}
-
+
if (Class == cShort) {
BuildMI(*MBB, IP, X86::IMUL16rri,2,DestReg).addReg(op0Reg).addImm(ConstRHS);
return;
// Most general case, emit a normal multiply...
TmpReg = makeAnotherReg(DestTy);
BuildMI(*MBB, IP, MOVriTab[Class], 1, TmpReg).addImm(ConstRHS);
-
+
// Emit a MUL to multiply the register holding the index by
// elementSize, putting the result in OffsetReg.
doMultiply(MBB, IP, DestReg, DestTy, op0Reg, TmpReg);
const Type *Ty = Op0->getType();
assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
unsigned Opcode = Ty == Type::FloatTy ? X86::FMUL32m : X86::FMUL64m;
-
+
unsigned Op0r = getReg(Op0);
if (AllocaInst *AI = dyn_castFixedAlloca(LI->getOperand(0))) {
unsigned FI = getFixedSizedAllocaFI(AI);
} else {
X86AddressMode AM;
getAddressingMode(LI->getOperand(0), AM);
-
+
addFullAddress(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), AM);
}
return;
emitMultiply(BB, IP, Op0, Op1, ResultReg);
}
-void X86ISel::emitMultiply(MachineBasicBlock *MBB,
+void X86ISel::emitMultiply(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
Value *Op0, Value *Op1, unsigned DestReg) {
MachineBasicBlock &BB = *MBB;
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
unsigned CLow = CI->getRawValue();
unsigned CHi = CI->getRawValue() >> 32;
-
+
if (CLow == 0) {
// If the low part of the constant is all zeros, things are simple.
BuildMI(BB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
doMultiplyConst(&BB, IP, DestReg+1, Type::UIntTy, Op0Reg, CHi);
return;
}
-
+
// Multiply the two low parts... capturing carry into EDX
unsigned OverflowReg = 0;
if (CLow == 1) {
BuildMI(BB, IP, X86::MOV32ri, 1, Op1RegL).addImm(CLow);
BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL
-
+
BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
BuildMI(BB, IP, X86::MOV32rr, 1,
OverflowReg).addReg(X86::EDX); // AL*BL >> 32
}
-
+
unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
doMultiplyConst(&BB, IP, AHBLReg, Type::UIntTy, Op0Reg+1, CLow);
-
+
unsigned AHBLplusOverflowReg;
if (OverflowReg) {
AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
} else {
AHBLplusOverflowReg = AHBLReg;
}
-
+
if (CHi == 0) {
BuildMI(BB, IP, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg);
} else {
unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
doMultiplyConst(&BB, IP, ALBHReg, Type::UIntTy, Op0Reg, CHi);
-
+
BuildMI(BB, IP, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
}
// Multiply the two low parts... capturing carry into EDX
BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL
-
+
unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
BuildMI(BB, IP, X86::MOV32rr, 1,
OverflowReg).addReg(X86::EDX); // AL*BL >> 32
-
+
unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
BuildMI(BB, IP, X86::IMUL32rr, 2,
AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
-
+
unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, IP, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
-
+
unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
BuildMI(BB, IP, X86::IMUL32rr, 2,
ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
-
+
BuildMI(BB, IP, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
}
const Type *Ty = Op0->getType();
assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
unsigned Opcode = Ty == Type::FloatTy ? X86::FDIV32m : X86::FDIV64m;
-
+
unsigned Op0r = getReg(Op0);
if (AllocaInst *AI = dyn_castFixedAlloca(LI->getOperand(0))) {
unsigned FI = getFixedSizedAllocaFI(AI);
} else {
X86AddressMode AM;
getAddressingMode(LI->getOperand(0), AM);
-
+
addFullAddress(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), AM);
}
return;
const Type *Ty = Op0->getType();
assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
unsigned Opcode = Ty == Type::FloatTy ? X86::FDIVR32m : X86::FDIVR64m;
-
+
unsigned Op1r = getReg(Op1);
if (AllocaInst *AI = dyn_castFixedAlloca(LI->getOperand(0))) {
unsigned FI = getFixedSizedAllocaFI(AI);
unsigned TmpReg1 = makeAnotherReg(Op0->getType());
BuildMI(*BB, IP, ANDOpcode[Class], 2, TmpReg1).addReg(Op0Reg).addImm(1);
-
+
unsigned TmpReg2 = makeAnotherReg(Op0->getType());
BuildMI(*BB, IP, XOROpcode[Class], 2,
TmpReg2).addReg(TmpReg1).addReg(TmpReg0);
static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
static const unsigned ClrOpcode[]={ X86::MOV8ri, X86::MOV16ri, X86::MOV32ri };
static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX };
+ static const unsigned SExOpcode[]={ X86::CBW , X86::CWD , X86::CDQ };
static const unsigned DivOpcode[][4] = {
{ X86::DIV8r , X86::DIV16r , X86::DIV32r , 0 }, // Unsigned division
BuildMI(*BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
if (Ty->isSigned()) {
- // Emit a sign extension instruction...
- unsigned ShiftResult = makeAnotherReg(Op0->getType());
- BuildMI(*BB, IP, SAROpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31);
- BuildMI(*BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult);
+ // Emit a sign extension instruction.
+ BuildMI(*BB, IP, SExOpcode[Class], 0);
// Emit the appropriate divide or remainder instruction...
BuildMI(*BB, IP, DivOpcode[1][Class], 1).addReg(Op1Reg);
// Figure out which register we want to pick the result out of...
unsigned DestReg = isDiv ? Reg : ExtReg;
-
+
// Put the result into the destination register...
BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(DestReg);
}
/// Emit code for a 'SHLD DestReg, Op0, Op1, Amt' operation, where Amt is a
/// constant.
-void X86ISel::doSHLDConst(MachineBasicBlock *MBB,
+void X86ISel::doSHLDConst(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
unsigned DestReg, unsigned Op0Reg, unsigned Op1Reg,
unsigned Amt) {
// NOTE: It is always cheaper on the P4 to emit SHLD as two shifts and an OR
// than it is to emit a real SHLD.
- BuildMI(*MBB, IP, X86::SHLD32rri8, 3,
+ BuildMI(*MBB, IP, X86::SHLD32rri8, 3,
DestReg).addReg(Op0Reg).addReg(Op1Reg).addImm(Amt);
}
}
/// constant expression support.
void X86ISel::emitShiftOperation(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
- Value *Op, Value *ShiftAmount,
- bool isLeftShift, const Type *ResultTy,
+ Value *Op, Value *ShiftAmount,
+ bool isLeftShift, const Type *ResultTy,
unsigned DestReg) {
unsigned SrcReg = getReg (Op, MBB, IP);
bool isSigned = ResultTy->isSigned ();
} else {
BuildMI(*MBB, IP, isSigned ? X86::SAR32ri : X86::SHR32ri, 2,
DestReg).addReg(SrcReg+1).addImm(Amount);
- BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
+ if (isSigned)
+ BuildMI(*MBB, IP, X86::SAR32ri, 2,
+ DestReg+1).addReg(SrcReg+1).addImm(31);
+ else
+ BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
}
}
} else {
BuildMI(*MBB, IP, X86::TEST8ri, 2).addReg(X86::CL).addImm(32);
// DestHi = (>32) ? TmpReg3 : TmpReg2;
- BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
+ BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
DestReg+1).addReg(TmpReg2).addReg(TmpReg3);
// DestLo = (>32) ? TmpReg : TmpReg3;
BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
BuildMI(*MBB, IP, X86::TEST8ri, 2).addReg(X86::CL).addImm(32);
// DestLo = (>32) ? TmpReg3 : TmpReg2;
- BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
+ BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
DestReg).addReg(TmpReg2).addReg(TmpReg3);
// DestHi = (>32) ? TmpReg : TmpReg3;
- BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
+ BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
DestReg+1).addReg(TmpReg3).addReg(TmpReg);
}
}
bool Swapped = false;
if (!isa<LoadInst>(User->getOperand(1)))
Swapped = !cast<BinaryOperator>(User)->swapOperands();
-
+
// Okay, now that everything is set up, if this load is used by the second
// operand, and if there are no instructions that invalidate the load
// before the binary operator, eliminate the load.
return; // Eliminate the load!
// If we swapped the operands to the instruction, but couldn't fold the
- // load anyway, swap them back. We don't want to break add X, int
+ // load anyway, swap them back. We don't want to break add X, int
// folding.
if (Swapped) cast<BinaryOperator>(User)->swapOperands();
}
} else {
X86AddressMode AM;
getAddressingMode(I.getOperand(0), AM);
-
+
if (Class == cLong) {
addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg), AM);
AM.Disp += 4;
}
} else if (isa<ConstantPointerNull>(I.getOperand(0))) {
addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(0);
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(0))) {
+ addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addGlobalAddress(GV);
} else if (ConstantBool *CB = dyn_cast<ConstantBool>(I.getOperand(0))) {
addFullAddress(BuildMI(BB, X86::MOV8mi, 5), AM).addImm(CB->getValue());
} else if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) {
addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(
unsigned(V.I >> 32));
}
-
+
} else if (Class == cLong) {
unsigned ValReg = getReg(I.getOperand(0));
addFullAddress(BuildMI(BB, X86::MOV32mr, 5), AM).addReg(ValReg);
// Noop casts are not emitted: getReg will return the source operand as the
// register to use for any uses of the noop cast.
if (DestClass == SrcClass) {
- // The only detail in this plan is that casts from double -> float are
+ // The only detail in this plan is that casts from double -> float are
// truncating operations that we have to codegen through memory (despite
// the fact that the source/dest registers are the same class).
if (CI.getType() != Type::FloatTy || Op->getType() != Type::DoubleTy)
if (!isa<GetElementPtrInst>(*I)) {
AllUsesAreGEPs = false;
break;
- }
+ }
// No need to codegen this cast if all users are getelementptr instrs...
if (AllUsesAreGEPs) return;
// reading it back.
unsigned FltAlign = TM.getTargetData().getFloatAlignment();
int FrameIdx = F->getFrameInfo()->CreateStackObject(4, FltAlign);
- addFrameReference(BuildMI(*BB, IP, X86::FST32m, 5), FrameIdx).addReg(SrcReg);
+ addFrameReference(BuildMI(*BB, IP, X86::FST32m, 5),
+ FrameIdx).addReg(SrcReg);
addFrameReference(BuildMI(*BB, IP, X86::FLD32m, 5, DestReg), FrameIdx);
}
} else if (SrcClass == cLong) {
{ X86::MOVSX16rr8, X86::MOVSX32rr8, X86::MOVSX32rr16, X86::MOV32rr }, // s
{ X86::MOVZX16rr8, X86::MOVZX32rr8, X86::MOVZX32rr16, X86::MOV32rr } // u
};
-
+
bool isUnsigned = SrcTy->isUnsigned() || SrcTy == Type::BoolTy;
BuildMI(*BB, IP, Opc[isUnsigned][SrcClass + DestClass - 1], 1,
DestReg).addReg(SrcReg);
BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg);
return;
}
-
+
// Handle cast of LARGER int to SMALLER int using a move to EAX followed by a
// move out of AX or AL.
if ((SrcClass <= cInt || SrcClass == cLong) && DestClass <= cInt
default: // No promotion needed...
break;
}
-
+
if (PromoteType) {
unsigned TmpReg = makeAnotherReg(PromoteType);
BuildMI(*BB, IP, PromoteOpcode, 1, TmpReg).addReg(SrcReg);
// Compute whether the sign bit is set by shifting the reg right 31 bits.
unsigned IsNeg = makeAnotherReg(Type::IntTy);
- BuildMI(BB, X86::SHR32ri, 2, IsNeg).addReg(SrcReg).addImm(31);
+ BuildMI(*BB, IP, X86::SHR32ri, 2, IsNeg).addReg(SrcReg).addImm(31);
// Create a CP value that has the offset in one word and 0 in the other.
static ConstantInt *TheOffset = ConstantUInt::get(Type::ULongTy,
0x4f80000000000000ULL);
unsigned CPI = F->getConstantPool()->getConstantPoolIndex(TheOffset);
- BuildMI(BB, X86::FADD32m, 5, RealDestReg).addReg(DestReg)
+ BuildMI(*BB, IP, X86::FADD32m, 5, RealDestReg).addReg(DestReg)
.addConstantPoolIndex(CPI).addZImm(4).addReg(IsNeg).addSImm(0);
} else if (SrcTy == Type::ULongTy) {
MachineConstantPool *CP = F->getConstantPool();
unsigned Zero = makeAnotherReg(Type::IntTy);
Constant *Null = Constant::getNullValue(Type::UIntTy);
- addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Zero),
+ addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Zero),
CP->getConstantPoolIndex(Null));
unsigned Offset = makeAnotherReg(Type::IntTy);
Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000);
-
+
addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Offset),
CP->getConstantPoolIndex(OffsetCst));
unsigned Addr = makeAnotherReg(Type::IntTy);
// Reload the modified control word now...
addFrameReference(BuildMI(*BB, IP, X86::FLDCW16m, 4), CWFrameIdx);
-
+
// Restore the memory image of control word to original value
addFrameReference(BuildMI(*BB, IP, X86::MOV8mr, 5),
CWFrameIdx, 1).addReg(HighPartOfCW);
///
/// Note that there is one fewer entry in GEPTypes than there is in GEPOps.
///
-void X86ISel::getGEPIndex(MachineBasicBlock *MBB,
+void X86ISel::getGEPIndex(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
std::vector<Value*> &GEPOps,
std::vector<const Type*> &GEPTypes,
// It's a struct access. CUI is the index into the structure,
// which names the field. This index must have unsigned type.
const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back());
-
+
// Use the TargetData structure to pick out what the layout of the
// structure is in memory. Since the structure index must be constant, we
// can get its value and use it to find the right byte offset from the
// If the index reg is already taken, we can't handle this index.
if (AM.IndexReg) return;
- // If this is a size that we can handle, then add the index as
+ // If this is a size that we can handle, then add the index as
switch (TypeSize) {
case 1: case 2: case 4: case 8:
// These are all acceptable scales on X86.
GEPOps.resize(IdxEnd-IdxBegin+1);
GEPOps[0] = Src;
std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1);
-
+
std::vector<const Type*>
GEPTypes(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
gep_type_end(Src->getType(), IdxBegin, IdxEnd));
GEPOps.resize(IdxEnd-IdxBegin+1);
GEPOps[0] = Src;
std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1);
-
+
std::vector<const Type*> GEPTypes;
GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
gep_type_end(Src->getType(), IdxBegin, IdxEnd));
unsigned OldSize = GEPOps.size();
X86AddressMode AM;
getGEPIndex(MBB, IP, GEPOps, GEPTypes, AM);
-
+
if (GEPOps.size() != OldSize) {
// getGEPIndex consumed some of the input. Build an LEA instruction here.
unsigned NextTarget = 0;
// statically stack allocate the space, so we don't need to do anything here.
//
if (dyn_castFixedAlloca(&I)) return;
-
+
// Find the data size of the alloca inst's getAllocatedType.
const Type *Ty = I.getAllocatedType();
unsigned TySize = TM.getTargetData().getTypeSize(Ty);
// constant by the variable amount.
unsigned TotalSizeReg = makeAnotherReg(Type::UIntTy);
unsigned SrcReg1 = getReg(I.getArraySize());
-
+
// TotalSizeReg = mul <numelements>, <TypeSize>
MachineBasicBlock::iterator MBBI = BB->end();
doMultiplyConst(BB, MBBI, TotalSizeReg, Type::UIntTy, SrcReg1, TySize);
// AlignedSize = and <AddedSize>, ~15
unsigned AlignedSize = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::AND32ri, 2, AlignedSize).addReg(AddedSizeReg).addImm(~15);
-
+
// Subtract size from stack pointer, thereby allocating some space.
BuildMI(BB, X86::SUB32rr, 2, X86::ESP).addReg(X86::ESP).addReg(AlignedSize);
1).addExternalSymbol("free", true);
doCall(ValueRecord(0, Type::VoidTy), TheCall, Args);
}
-
+
/// createX86SimpleInstructionSelector - This pass converts an LLVM function
/// into a machine code representation is a very simple peep-hole fashion. The
/// generated code sucks but the implementation is nice and simple.