#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
public:
explicit X86FastISel(MachineFunction &mf,
MachineModuleInfo *mmi,
+ DwarfWriter *dw,
DenseMap<const Value *, unsigned> &vm,
DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
DenseMap<const AllocaInst *, int> &am
, SmallSet<Instruction*, 8> &cil
#endif
)
- : FastISel(mf, mmi, vm, bm, am
+ : FastISel(mf, mmi, dw, vm, bm, am
#ifndef NDEBUG
, cil
#endif
bool X86SelectFPExt(Instruction *I);
bool X86SelectFPTrunc(Instruction *I);
+ bool X86SelectExtractValue(Instruction *I);
+
+ bool X86VisitIntrinsicCall(CallInst &I, unsigned Intrinsic);
bool X86SelectCall(Instruction *I);
CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false);
return X86FastEmitStore(VT, ValReg, AM);
}
-
-
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
/// ISD::SIGN_EXTEND).
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
return X86SelectAddress(U->getOperand(0), AM, isCall);
+ break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
return X86SelectAddress(U->getOperand(0), AM, isCall);
+ break;
case Instruction::Alloca: {
if (isCall) break;
unsigned IndexReg = AM.IndexReg;
unsigned Scale = AM.Scale;
gep_type_iterator GTI = gep_type_begin(U);
- // Look at all but the last index. Constants can be folded,
- // and one dynamic index can be handled, if the scale is supported.
+ // Iterate through the indices, folding what we can. Constants can be
+ // folded, and one dynamic index can be handled, if the scale is supported.
for (User::op_iterator i = U->op_begin() + 1, e = U->op_end();
i != e; ++i, ++GTI) {
Value *Op = *i;
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
Disp += SL->getElementOffset(Idx);
} else {
- uint64_t S = TD.getABITypeSize(GTI.getIndexedType());
+ uint64_t S = TD.getTypePaddedSize(GTI.getIndexedType());
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
Disp += CI->getSExtValue() * S;
(S == 1 || S == 2 || S == 4 || S == 8)) {
// Scaled-index addressing.
Scale = S;
- IndexReg = getRegForValue(Op);
+ IndexReg = getRegForGEPIndex(Op);
if (IndexReg == 0)
return false;
} else
MBB->addSuccessor(TrueMBB);
return true;
}
+ } else if (ExtractValueInst *EI =
+ dyn_cast<ExtractValueInst>(BI->getCondition())) {
+ // Check to see if the branch instruction is from an "arithmetic with
+ // overflow" intrinsic. The main way these intrinsics are used is:
+ //
+ // %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+ // %sum = extractvalue { i32, i1 } %t, 0
+ // %obit = extractvalue { i32, i1 } %t, 1
+ // br i1 %obit, label %overflow, label %normal
+ //
+ // The %sum and %obit are converted in an ADD and a SETO/SETB before
+ // reaching the branch. Therefore, we search backwards through the MBB
+ // looking for the SETO/SETB instruction. If an instruction modifies the
+ // EFLAGS register before we reach the SETO/SETB instruction, then we can't
+ // convert the branch into a JO/JB instruction.
+
+ Value *Agg = EI->getAggregateOperand();
+
+ if (CallInst *CI = dyn_cast<CallInst>(Agg)) {
+ Function *F = CI->getCalledFunction();
+
+ if (F && F->isDeclaration()) {
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow: {
+ const MachineInstr *SetMI = 0;
+ unsigned Reg = lookUpRegForValue(EI);
+
+ for (MachineBasicBlock::const_reverse_iterator
+ RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
+ const MachineInstr &MI = *RI;
+
+ if (MI.modifiesRegister(Reg)) {
+ unsigned Src, Dst, SrcSR, DstSR;
+
+ if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
+ Reg = Src;
+ continue;
+ }
+
+ SetMI = &MI;
+ break;
+ }
+
+ const TargetInstrDesc &TID = MI.getDesc();
+ const unsigned *ImpDefs = TID.getImplicitDefs();
+
+ if (TID.hasUnmodeledSideEffects()) break;
+
+ bool ModifiesEFlags = false;
+
+ if (ImpDefs) {
+ for (unsigned u = 0; ImpDefs[u]; ++u)
+ if (ImpDefs[u] == X86::EFLAGS) {
+ ModifiesEFlags = true;
+ break;
+ }
+ }
+
+ if (ModifiesEFlags) break;
+ }
+
+ if (SetMI) {
+ unsigned OpCode = SetMI->getOpcode();
+
+ if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
+ BuildMI(MBB, TII.get((OpCode == X86::SETOr) ?
+ X86::JO : X86::JB)).addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB);
+ MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ }
+ }
+ }
+ }
+ }
}
// Otherwise do a clumsy setcc and re-test it.
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
unsigned ResultReg = createResultReg(RC);
BuildMI(MBB, TII.get(OpImm),
- ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue());
+ ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
UpdateValueMap(I, ResultReg);
return true;
}
BuildMI(MBB, TII.get(CopyOpc), CopyReg).addReg(InputReg);
// Then issue an extract_subreg.
- unsigned ResultReg = FastEmitInst_extractsubreg(CopyReg, X86::SUBREG_8BIT);
+ unsigned ResultReg = FastEmitInst_extractsubreg(DstVT.getSimpleVT(),
+ CopyReg, X86::SUBREG_8BIT);
if (!ResultReg)
return false;
return true;
}
+bool X86FastISel::X86SelectExtractValue(Instruction *I) {
+ ExtractValueInst *EI = cast<ExtractValueInst>(I);
+ Value *Agg = EI->getAggregateOperand();
+
+ if (CallInst *CI = dyn_cast<CallInst>(Agg)) {
+ Function *F = CI->getCalledFunction();
+
+ if (F && F->isDeclaration()) {
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ // Cheat a little. We know that the registers for "add" and "seto" are
+ // allocated sequentially. However, we only keep track of the register
+ // for "add" in the value map. Use extractvalue's index to get the
+ // correct register for "seto".
+ UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin());
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86VisitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+ // FIXME: Handle more intrinsics.
+ switch (Intrinsic) {
+ default: return false;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow: {
+ // Replace "add with overflow" intrinsics with an "add" instruction followed
+ // by a seto/setc instruction. Later on, when the "extractvalue"
+ // instructions are encountered, we use the fact that two registers were
+ // created sequentially to get the correct registers for the "sum" and the
+ // "overflow bit".
+ MVT VT;
+ const Function *Callee = I.getCalledFunction();
+ const Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
+
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ Value *Op1 = I.getOperand(1);
+ Value *Op2 = I.getOperand(2);
+ unsigned Reg1 = getRegForValue(Op1);
+ unsigned Reg2 = getRegForValue(Op2);
+
+ if (Reg1 == 0 || Reg2 == 0)
+ // FIXME: Handle values *not* in registers.
+ return false;
+
+ unsigned OpC = 0;
+
+ if (VT == MVT::i32)
+ OpC = X86::ADD32rr;
+ else if (VT == MVT::i64)
+ OpC = X86::ADD64rr;
+ else
+ return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(MBB, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2);
+ UpdateValueMap(&I, ResultReg);
+
+ ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
+ BuildMI(MBB, TII.get((Intrinsic == Intrinsic::sadd_with_overflow) ?
+ X86::SETOr : X86::SETBr), ResultReg);
+ return true;
+ }
+ }
+}
+
bool X86FastISel::X86SelectCall(Instruction *I) {
CallInst *CI = cast<CallInst>(I);
Value *Callee = I->getOperand(0);
if (isa<InlineAsm>(Callee))
return false;
- // FIXME: Handle some intrinsics.
- if (Function *F = CI->getCalledFunction()) {
- if (F->isDeclaration() &&F->getIntrinsicID())
- return false;
- }
+ // Handle intrinsic calls.
+ if (Function *F = CI->getCalledFunction())
+ if (F->isDeclaration())
+ if (unsigned IID = F->getIntrinsicID())
+ return X86VisitIntrinsicCall(*CI, IID);
// Handle only C and fastcc calling conventions for now.
CallSite CS(CI);
case CCValAssign::SExt: {
bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a sext!");
+ assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
+ Emitted = true;
ArgVT = VA.getLocVT();
break;
}
case CCValAssign::ZExt: {
bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a zext!");
+ assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
+ Emitted = true;
ArgVT = VA.getLocVT();
break;
}
Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a aext!");
+ assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
ArgVT = VA.getLocVT();
break;
}
TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
Arg, RC, RC);
- assert(Emitted && "Failed to emit a copy instruction!");
+ assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
+ Emitted = true;
RegArgs.push_back(VA.getLocReg());
} else {
unsigned LocMemOffset = VA.getLocMemOffset();
TargetRegisterClass *RC = X86::GR32RegisterClass;
unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
- assert(Emitted && "Failed to emit a copy instruction!");
+ assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
+ Emitted = true;
}
// Issue the call.
unsigned ResultReg = createResultReg(DstRC);
bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
RVLocs[0].getLocReg(), DstRC, SrcRC);
- assert(Emitted && "Failed to emit a copy instruction!");
+ assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
+ Emitted = true;
if (CopyVT != RVLocs[0].getValVT()) {
// Round the F80 the right size, which also moves to the appropriate xmm
// register. This is accomplished by storing the F80 value in memory and
return X86SelectFPExt(I);
case Instruction::FPTrunc:
return X86SelectFPTrunc(I);
+ case Instruction::ExtractValue:
+ return X86SelectExtractValue(I);
}
return false;
unsigned Align = TD.getPreferredTypeAlignmentShift(C->getType());
if (Align == 0) {
// Alignment of vector types. FIXME!
- Align = TD.getABITypeSize(C->getType());
+ Align = TD.getTypePaddedSize(C->getType());
Align = Log2_64(Align);
}
namespace llvm {
llvm::FastISel *X86::createFastISel(MachineFunction &mf,
MachineModuleInfo *mmi,
+ DwarfWriter *dw,
DenseMap<const Value *, unsigned> &vm,
DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
DenseMap<const AllocaInst *, int> &am
, SmallSet<Instruction*, 8> &cil
#endif
) {
- return new X86FastISel(mf, mmi, vm, bm, am
+ return new X86FastISel(mf, mmi, dw, vm, bm, am
#ifndef NDEBUG
, cil
#endif