#include "ARMTargetMachine.h"
#include "ARMSubtarget.h"
#include "ARMConstantPoolValue.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
+#include "llvm/Operator.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
cl::desc("Turn off experimental ARM fast-isel support"),
cl::init(false), cl::Hidden);
+extern cl::opt<bool> EnableARMLongCalls;
+
namespace {
+ // All possible address modes, plus some.
+ typedef struct Address {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
+
+ int Offset;
+
+ // Innocuous defaults for our address.
+ Address()
+ : BaseType(RegBase), Offset(0) {
+ Base.Reg = 0;
+ }
+ } Address;
+
class ARMFastISel : public FastISel {
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
ARMFunctionInfo *AFI;
// Convenience variables to avoid some queries.
- bool isThumb;
+ bool isThumb2;
LLVMContext *Context;
public:
TLI(*TM.getTargetLowering()) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
- isThumb = AFI->isThumbFunction();
+ isThumb2 = AFI->isThumbFunction();
Context = &funcInfo.Fn->getContext();
}
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
+ virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill);
virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
const ConstantFP *FPImm);
- virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm);
virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
uint64_t Imm);
+ virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2);
+
virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
unsigned Op0, bool Op0IsKill,
uint32_t Idx);
bool SelectCall(const Instruction *I);
bool SelectSelect(const Instruction *I);
bool SelectRet(const Instruction *I);
+ bool SelectTrunc(const Instruction *I);
+ bool SelectIntExt(const Instruction *I);
// Utility routines.
private:
- bool isTypeLegal(const Type *Ty, MVT &VT);
- bool isLoadTypeLegal(const Type *Ty, MVT &VT);
- bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Base, int Offset);
- bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Base, int Offset);
- bool ARMComputeRegOffset(const Value *Obj, unsigned &Base, int &Offset);
- void ARMSimplifyRegOffset(unsigned &Base, int &Offset, EVT VT);
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+ bool ARMComputeAddress(const Value *Obj, Address &Addr);
+ void ARMSimplifyAddress(Address &Addr, EVT VT);
+ unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
unsigned ARMMaterializeInt(const Constant *C, EVT VT);
unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+ unsigned ARMSelectCallOp(const GlobalValue *GV);
// Call handling routines.
private:
// OptionalDef handling routines.
private:
+ bool isARMNEONPred(const MachineInstr *MI);
bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+ void AddLoadStoreOperands(EVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB,
+ unsigned Flags);
};
} // end anonymous namespace
// we don't care about implicit defs here, just places we'll need to add a
// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
- const TargetInstrDesc &TID = MI->getDesc();
- if (!TID.hasOptionalDef())
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.hasOptionalDef())
return false;
// Look to see if our OptionalDef is defining CPSR or CCR.
return true;
}
+bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ // If we're a thumb2 or not NEON function we were handled via isPredicable.
+ if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
+ AFI->isThumb2Function())
+ return false;
+
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+ if (MCID.OpInfo[i].isPredicate())
+ return true;
+
+ return false;
+}
+
// If the machine is predicable go ahead and add the predicate operands, if
// it needs default CC operands add those.
// TODO: If we want to support thumb1 then we'll need to deal with optional
ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
MachineInstr *MI = &*MIB;
- // Do we use a predicate?
- if (TII.isPredicable(MI))
+ // Do we use a predicate? or...
+ // Are we NEON in ARM mode and have a predicate operand? If so, I know
+ // we're not predicable but add it anyways.
+ if (TII.isPredicable(MI) || isARMNEONPred(MI))
AddDefaultPred(MIB);
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
const TargetRegisterClass* RC) {
unsigned ResultReg = createResultReg(RC);
- const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
return ResultReg;
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill) {
unsigned ResultReg = createResultReg(RC);
- const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill) {
unsigned ResultReg = createResultReg(RC);
- const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
return ResultReg;
}
+unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
- const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
unsigned Op0, bool Op0IsKill,
const ConstantFP *FPImm) {
unsigned ResultReg = createResultReg(RC);
- const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
unsigned Op1, bool Op1IsKill,
uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
- const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
const TargetRegisterClass *RC,
uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
- const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
return ResultReg;
}
+unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm1).addImm(Imm2));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm1).addImm(Imm2));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
unsigned Op0, bool Op0IsKill,
uint32_t Idx) {
// This checks to see if we can use VFP3 instructions to materialize
// a constant, otherwise we have to go through the constant pool.
if (TLI.isFPImmLegal(Val, VT)) {
- unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
+ int Imm;
+ unsigned Opc;
+ if (is64bit) {
+ Imm = ARM_AM::getFP64Imm(Val);
+ Opc = ARM::FCONSTD;
+ } else {
+ Imm = ARM_AM::getFP32Imm(Val);
+ Opc = ARM::FCONSTS;
+ }
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
DestReg)
- .addFPImm(CFP));
+ .addImm(Imm));
return DestReg;
}
unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
- // For now 32-bit only.
- if (VT != MVT::i32) return false;
-
- unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
+ return false;
// If we can do this in a single instruction without a constant pool entry
// do so now.
const ConstantInt *CI = cast<ConstantInt>(C);
- if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) {
- unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16;
+ if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
+ EVT SrcVT = MVT::i32;
+ unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
+ unsigned ImmReg = createResultReg(TLI.getRegClassFor(SrcVT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Opc), DestReg)
- .addImm(CI->getSExtValue()));
- return DestReg;
+ TII.get(Opc), ImmReg)
+ .addImm(CI->getZExtValue()));
+ return ImmReg;
}
+ // For now 32-bit only.
+ if (VT != MVT::i32)
+ return false;
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
// MachineConstantPool wants an explicit alignment.
unsigned Align = TD.getPrefTypeAlignment(C->getType());
if (Align == 0) {
}
unsigned Idx = MCP.getConstantPoolIndex(C, Align);
- if (isThumb)
+ if (isThumb2)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::t2LDRpci), DestReg)
.addConstantPoolIndex(Idx));
else
- // The extra reg and immediate are for addrmode2.
+ // The extra immediate is for addrmode2.
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::LDRcp), DestReg)
.addConstantPoolIndex(Idx)
Reloc::Model RelocM = TM.getRelocationModel();
- // TODO: No external globals for now.
- if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0;
-
// TODO: Need more magic for ARM PIC.
- if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
+ if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0;
// MachineConstantPool wants an explicit alignment.
unsigned Align = TD.getPrefTypeAlignment(GV->getType());
// Grab index.
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
- unsigned Id = AFI->createConstPoolEntryUId();
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id,
- ARMCP::CPValue, PCAdj);
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
+ ARMCP::CPValue,
+ PCAdj);
unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
// Load value.
MachineInstrBuilder MIB;
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
- if (isThumb) {
+ if (isThumb2) {
unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
.addConstantPoolIndex(Idx);
if (RelocM == Reloc::PIC_)
MIB.addImm(Id);
} else {
- // The extra reg and immediate are for addrmode2.
+ // The extra immediate is for addrmode2.
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
DestReg)
.addConstantPoolIndex(Idx)
- .addReg(0).addImm(0);
+ .addImm(0);
}
AddOptionalDefs(MIB);
+
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (isThumb2)
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRi12), NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ else
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12),
+ NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ DestReg = NewDestReg;
+ AddOptionalDefs(MIB);
+ }
+
return DestReg;
}
if (SI != FuncInfo.StaticAllocaMap.end()) {
TargetRegisterClass* RC = TLI.getRegClassFor(VT);
unsigned ResultReg = createResultReg(RC);
- unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addFrameIndex(SI->second)
return 0;
}
-bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) {
+bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(Ty, true);
// Only handle simple types.
return TLI.isTypeLegal(VT);
}
-bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) {
+bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
if (isTypeLegal(Ty, VT)) return true;
// If this is a type than can be sign or zero-extended to a basic operation
return false;
}
-// Computes the Reg+Offset to get to an object.
-bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Base,
- int &Offset) {
+// Computes the address to get to an object.
+bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
// Some boilerplate from the X86 FastISel.
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
- // Don't walk into other basic blocks; it's possible we haven't
- // visited them yet, so the instructions may not yet be assigned
- // virtual registers.
- if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
- return false;
- Opcode = I->getOpcode();
- U = I;
+ // Don't walk into other basic blocks unless the object is an alloca from
+ // another block, otherwise it may not have a virtual register assigned.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
Opcode = C->getOpcode();
U = C;
}
- if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
break;
case Instruction::BitCast: {
// Look through bitcasts.
- return ARMComputeRegOffset(U->getOperand(0), Base, Offset);
+ return ARMComputeAddress(U->getOperand(0), Addr);
}
case Instruction::IntToPtr: {
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
- return ARMComputeRegOffset(U->getOperand(0), Base, Offset);
+ return ARMComputeAddress(U->getOperand(0), Addr);
break;
}
case Instruction::PtrToInt: {
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
- return ARMComputeRegOffset(U->getOperand(0), Base, Offset);
+ return ARMComputeAddress(U->getOperand(0), Addr);
break;
}
case Instruction::GetElementPtr: {
- int SavedOffset = Offset;
- unsigned SavedBase = Base;
- int TmpOffset = Offset;
+ Address SavedAddr = Addr;
+ int TmpOffset = Addr.Offset;
// Iterate through the GEP folding the constants into offsets where
// we can.
for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
i != e; ++i, ++GTI) {
const Value *Op = *i;
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
- SmallVector<const Value *, 4> Worklist;
- Worklist.push_back(Op);
- do {
- Op = Worklist.pop_back_val();
+ for (;;) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
- } else if (isa<AddOperator>(Op) &&
- isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
- // An add with a constant operand. Fold the constant.
+ break;
+ }
+ if (isa<AddOperator>(Op) &&
+ (!isa<Instruction>(Op) ||
+ FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+ == FuncInfo.MBB) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add (in the same block) with a constant operand. Fold the
+ // constant.
ConstantInt *CI =
- cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
- // Add the other operand back to the work list.
- Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
- } else
- goto unsupported_gep;
- } while (!Worklist.empty());
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ // Unsupported
+ goto unsupported_gep;
+ }
}
}
// Try to grab the base operand now.
- Offset = TmpOffset;
- if (ARMComputeRegOffset(U->getOperand(0), Base, Offset)) return true;
+ Addr.Offset = TmpOffset;
+ if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
// We failed, restore everything and try the other options.
- Offset = SavedOffset;
- Base = SavedBase;
+ Addr = SavedAddr;
unsupported_gep:
break;
}
case Instruction::Alloca: {
const AllocaInst *AI = cast<AllocaInst>(Obj);
- unsigned Reg = TargetMaterializeAlloca(AI);
-
- if (Reg == 0) return false;
-
- Base = Reg;
- return true;
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = SI->second;
+ return true;
+ }
+ break;
}
}
unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
if (Tmp == 0) return false;
- Base = Tmp;
+ Addr.Base.Reg = Tmp;
return true;
}
// Try to get this in a register if nothing else has worked.
- if (Base == 0) Base = getRegForValue(Obj);
- return Base != 0;
+ if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
+ return Addr.Base.Reg != 0;
}
-void ARMFastISel::ARMSimplifyRegOffset(unsigned &Base, int &Offset, EVT VT) {
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
assert(VT.isSimple() && "Non-simple types are invalid here!");
switch (VT.getSimpleVT().SimpleTy) {
default:
assert(false && "Unhandled load/store type!");
+ case MVT::i16:
+ if (isThumb2)
+ // Integer loads/stores handle 12-bit offsets.
+ needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+ else
+ // ARM i16 integer loads/stores handle +/-imm8 offsets.
+ // FIXME: Negative offsets require special handling.
+ if (Addr.Offset > 255 || Addr.Offset < 0)
+ needsLowering = true;
+ break;
case MVT::i1:
case MVT::i8:
- case MVT::i16:
case MVT::i32:
// Integer loads/stores handle 12-bit offsets.
- needsLowering = ((Offset & 0xfff) != Offset);
+ needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
break;
case MVT::f32:
case MVT::f64:
// Floating point operands handle 8-bit offsets.
- needsLowering = ((Offset & 0xff) != Offset);
+ needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
break;
}
+ // If this is a stack pointer and the offset needs to be simplified then
+ // put the alloca address into a register, set the base type back to
+ // register and continue. This should almost never happen.
+ if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
+ TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass;
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(Addr.Base.FI)
+ .addImm(0));
+ Addr.Base.Reg = ResultReg;
+ Addr.BaseType = Address::RegBase;
+ }
+
// Since the offset is too large for the load/store instruction
// get the reg+offset into a register.
if (needsLowering) {
- ARMCC::CondCodes Pred = ARMCC::AL;
- unsigned PredReg = 0;
-
- TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
- ARM::GPRRegisterClass;
- unsigned BaseReg = createResultReg(RC);
-
- if (!isThumb)
- emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- BaseReg, Base, Offset, Pred, PredReg,
- static_cast<const ARMBaseInstrInfo&>(TII));
- else {
- assert(AFI->isThumb2Function());
- emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- BaseReg, Base, Offset, Pred, PredReg,
- static_cast<const ARMBaseInstrInfo&>(TII));
- }
- Offset = 0;
- Base = BaseReg;
+ Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
+ /*Op0IsKill*/false, Addr.Offset, MVT::i32);
+ Addr.Offset = 0;
+ }
+}
+
+void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB,
+ unsigned Flags) {
+ // addrmode5 output depends on the selection dag addressing dividing the
+ // offset by 4 that it then later multiplies. Do this here as well.
+ if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
+ VT.getSimpleVT().SimpleTy == MVT::f64)
+ Addr.Offset /= 4;
+
+ // Frame base works a bit differently. Handle it separately.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ int FI = Addr.Base.FI;
+ int Offset = Addr.Offset;
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI, Offset),
+ Flags,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ // Now add the rest of the operands.
+ MIB.addFrameIndex(FI);
+
+ // ARM halfword load/stores need an additional operand.
+ if (!isThumb2 && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+ MIB.addImm(Addr.Offset);
+ MIB.addMemOperand(MMO);
+ } else {
+ // Now add the rest of the operands.
+ MIB.addReg(Addr.Base.Reg);
+
+ // ARM halfword load/stores need an additional operand.
+ if (!isThumb2 && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+ MIB.addImm(Addr.Offset);
}
+ AddOptionalDefs(MIB);
}
-bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg,
- unsigned Base, int Offset) {
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
assert(VT.isSimple() && "Non-simple types are invalid here!");
unsigned Opc;
TargetRegisterClass *RC;
- bool isFloat = false;
switch (VT.getSimpleVT().SimpleTy) {
- default:
- // This is mostly going to be Neon/vector support.
- return false;
- case MVT::i16:
- Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+ // This is mostly going to be Neon/vector support.
+ default: return false;
+ case MVT::i8:
+ Opc = isThumb2 ? ARM::t2LDRBi12 : ARM::LDRBi12;
RC = ARM::GPRRegisterClass;
break;
- case MVT::i8:
- Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
+ case MVT::i16:
+ Opc = isThumb2 ? ARM::t2LDRHi12 : ARM::LDRH;
RC = ARM::GPRRegisterClass;
break;
case MVT::i32:
- Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
+ Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
RC = ARM::GPRRegisterClass;
break;
case MVT::f32:
Opc = ARM::VLDRS;
RC = TLI.getRegClassFor(VT);
- isFloat = true;
break;
case MVT::f64:
Opc = ARM::VLDRD;
RC = TLI.getRegClassFor(VT);
- isFloat = true;
break;
}
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT);
+ // Create the base instruction, then add the operands.
ResultReg = createResultReg(RC);
-
- ARMSimplifyRegOffset(Base, Offset, VT);
-
- // addrmode5 output depends on the selection dag addressing dividing the
- // offset by 4 that it then later multiplies. Do this here as well.
- if (isFloat)
- Offset /= 4;
-
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Opc), ResultReg)
- .addReg(Base).addImm(Offset));
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg);
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad);
return true;
}
bool ARMFastISel::SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(I->getType(), VT))
return false;
- // Our register and offset with innocuous defaults.
- unsigned Base = 0;
- int Offset = 0;
-
- // See if we can handle this as Reg + Offset
- if (!ARMComputeRegOffset(I->getOperand(0), Base, Offset))
- return false;
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
unsigned ResultReg;
- if (!ARMEmitLoad(VT, ResultReg, Base, Offset)) return false;
-
+ if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
- unsigned Base, int Offset) {
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
unsigned StrOpc;
- bool isFloat = false;
- bool needReg0Op = false;
switch (VT.getSimpleVT().SimpleTy) {
+ // This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1: {
- unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass :
+ unsigned Res = createResultReg(isThumb2 ? ARM::tGPRRegisterClass :
ARM::GPRRegisterClass);
- unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+ unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), Res)
.addReg(SrcReg).addImm(1));
SrcReg = Res;
} // Fallthrough here.
case MVT::i8:
- StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12;
+ StrOpc = isThumb2 ? ARM::t2STRBi12 : ARM::STRBi12;
break;
case MVT::i16:
- StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
- needReg0Op = true;
+ StrOpc = isThumb2 ? ARM::t2STRHi12 : ARM::STRH;
break;
case MVT::i32:
- StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12;
+ StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
break;
case MVT::f32:
if (!Subtarget->hasVFP2()) return false;
StrOpc = ARM::VSTRS;
- isFloat = true;
break;
case MVT::f64:
if (!Subtarget->hasVFP2()) return false;
StrOpc = ARM::VSTRD;
- isFloat = true;
break;
}
-
- ARMSimplifyRegOffset(Base, Offset, VT);
-
- // addrmode5 output depends on the selection dag addressing dividing the
- // offset by 4 that it then later multiplies. Do this here as well.
- if (isFloat)
- Offset /= 4;
-
- // FIXME: The 'needReg0Op' bit goes away once STRH is converted to
- // not use the mega-addrmode stuff.
- if (!needReg0Op)
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(StrOpc))
- .addReg(SrcReg).addReg(Base).addImm(Offset));
- else
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(StrOpc))
- .addReg(SrcReg).addReg(Base).addReg(0).addImm(Offset));
-
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT);
+
+ // Create the base instruction, then add the operands.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc))
+ .addReg(SrcReg, getKillRegState(true));
+ AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore);
return true;
}
Value *Op0 = I->getOperand(0);
unsigned SrcReg = 0;
- // Yay type legalization
+ // Atomic stores need special handling.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
return false;
// Get the value to be stored into a register.
SrcReg = getRegForValue(Op0);
- if (SrcReg == 0)
- return false;
+ if (SrcReg == 0) return false;
- // Our register and offset with innocuous defaults.
- unsigned Base = 0;
- int Offset = 0;
-
- // See if we can handle this as Reg + Offset
- if (!ARMComputeRegOffset(I->getOperand(1), Base, Offset))
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(1), Addr))
return false;
- if (!ARMEmitStore(VT, SrcReg, Base, Offset)) return false;
-
+ if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
return true;
}
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
// Simple branch support.
-
+
// If we can, avoid recomputing the compare - redoing it could lead to wonky
// behavior.
- // TODO: Factor this out.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
- MVT VT;
- const Type *Ty = CI->getOperand(0)->getType();
- if (!isTypeLegal(Ty, VT))
- return false;
- bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
- if (isFloat && !Subtarget->hasVFP2())
- return false;
-
- unsigned CmpOpc;
- unsigned CondReg;
- switch (VT.SimpleTy) {
- default: return false;
- // TODO: Verify compares.
- case MVT::f32:
- CmpOpc = ARM::VCMPES;
- CondReg = ARM::FPSCR;
- break;
- case MVT::f64:
- CmpOpc = ARM::VCMPED;
- CondReg = ARM::FPSCR;
- break;
- case MVT::i32:
- CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
- CondReg = ARM::CPSR;
- break;
+ // Get the compare predicate.
+ // Try to take advantage of fallthrough opportunities.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
}
- // Get the compare predicate.
- ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+ ARMCC::CondCodes ARMPred = getComparePred(Predicate);
// We may not handle every CC for now.
if (ARMPred == ARMCC::AL) return false;
- unsigned Arg1 = getRegForValue(CI->getOperand(0));
- if (Arg1 == 0) return false;
-
- unsigned Arg2 = getRegForValue(CI->getOperand(1));
- if (Arg2 == 0) return false;
+ // Emit the compare.
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ return false;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CmpOpc))
- .addReg(Arg1).addReg(Arg2));
-
- // For floating point we need to move the result to a comparison register
- // that we can then use for branches.
- if (isFloat)
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(ARM::FMSTAT)));
-
- unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
.addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
FastEmitBranch(FBB, DL);
FuncInfo.MBB->addSuccessor(TBB);
return true;
}
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
+ unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TstOpc))
+ .addReg(OpReg).addImm(1));
+
+ unsigned CCMode = ARMCC::NE;
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ CCMode = ARMCC::EQ;
+ }
+
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
+ } else if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(BI->getCondition())) {
+ uint64_t Imm = CI->getZExtValue();
+ MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
+ FastEmitBranch(Target, DL);
+ return true;
}
-
+
unsigned CmpReg = getRegForValue(BI->getCondition());
if (CmpReg == 0) return false;
- // Re-set the flags just in case.
- unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
- .addReg(CmpReg).addImm(0));
+ // We've been divorced from our compare! Our block was split, and
+ // now our compare lives in a predecessor block. We musn't
+ // re-compare here, as the children of the compare aren't guaranteed
+ // live across the block boundary (we *could* check for this).
+ // Regardless, the compare has been done in the predecessor block,
+ // and it left a value for us in a virtual register. Ergo, we test
+ // the one-bit value left in the virtual register.
+ unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
+ .addReg(CmpReg).addImm(1));
+
+ unsigned CCMode = ARMCC::NE;
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ CCMode = ARMCC::EQ;
+ }
- unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
- .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
FastEmitBranch(FBB, DL);
FuncInfo.MBB->addSuccessor(TBB);
return true;
}
-bool ARMFastISel::SelectCmp(const Instruction *I) {
- const CmpInst *CI = cast<CmpInst>(I);
-
- MVT VT;
- const Type *Ty = CI->getOperand(0)->getType();
- if (!isTypeLegal(Ty, VT))
- return false;
+bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt) {
+ Type *Ty = Src1Value->getType();
+ EVT SrcVT = TLI.getValueType(Ty, true);
+ if (!SrcVT.isSimple()) return false;
- bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
if (isFloat && !Subtarget->hasVFP2())
return false;
+ // Check to see if the 2nd operand is a constant that we can encode directly
+ // in the compare.
+ int EncodedImm = 0;
+ bool EncodeImm = false;
+ bool isNegativeImm = false;
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
+ if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
+ SrcVT == MVT::i1) {
+ const APInt &CIVal = ConstInt->getValue();
+ EncodedImm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
+ if (EncodedImm < 0) {
+ isNegativeImm = true;
+ EncodedImm = -EncodedImm;
+ }
+ EncodeImm = isThumb2 ? (ARM_AM::getT2SOImmVal(EncodedImm) != -1) :
+ (ARM_AM::getSOImmVal(EncodedImm) != -1);
+ }
+ } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
+ if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
+ if (ConstFP->isZero() && !ConstFP->isNegative())
+ EncodeImm = true;
+ }
+
unsigned CmpOpc;
- unsigned CondReg;
- switch (VT.SimpleTy) {
+ bool isICmp = true;
+ bool needsExt = false;
+ switch (SrcVT.getSimpleVT().SimpleTy) {
default: return false;
// TODO: Verify compares.
case MVT::f32:
- CmpOpc = ARM::VCMPES;
- CondReg = ARM::FPSCR;
+ isICmp = false;
+ CmpOpc = EncodeImm ? ARM::VCMPEZS : ARM::VCMPES;
break;
case MVT::f64:
- CmpOpc = ARM::VCMPED;
- CondReg = ARM::FPSCR;
+ isICmp = false;
+ CmpOpc = EncodeImm ? ARM::VCMPEZD : ARM::VCMPED;
break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ needsExt = true;
+ // Intentional fall-through.
case MVT::i32:
- CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
- CondReg = ARM::CPSR;
+ if (isThumb2) {
+ if (!EncodeImm)
+ CmpOpc = ARM::t2CMPrr;
+ else
+ CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri;
+ } else {
+ if (!EncodeImm)
+ CmpOpc = ARM::CMPrr;
+ else
+ CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri;
+ }
break;
}
- // Get the compare predicate.
- ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
-
- // We may not handle every CC for now.
- if (ARMPred == ARMCC::AL) return false;
+ unsigned SrcReg1 = getRegForValue(Src1Value);
+ if (SrcReg1 == 0) return false;
- unsigned Arg1 = getRegForValue(CI->getOperand(0));
- if (Arg1 == 0) return false;
+ unsigned SrcReg2;
+ if (!EncodeImm) {
+ SrcReg2 = getRegForValue(Src2Value);
+ if (SrcReg2 == 0) return false;
+ }
- unsigned Arg2 = getRegForValue(CI->getOperand(1));
- if (Arg2 == 0) return false;
+ // We have i1, i8, or i16, we need to either zero extend or sign extend.
+ if (needsExt) {
+ unsigned ResultReg;
+ ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
+ if (ResultReg == 0) return false;
+ SrcReg1 = ResultReg;
+ if (!EncodeImm) {
+ ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
+ if (ResultReg == 0) return false;
+ SrcReg2 = ResultReg;
+ }
+ }
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
- .addReg(Arg1).addReg(Arg2));
+ if (!EncodeImm) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CmpOpc))
+ .addReg(SrcReg1).addReg(SrcReg2));
+ } else {
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(SrcReg1);
+
+ // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
+ if (isICmp)
+ MIB.addImm(EncodedImm);
+ AddOptionalDefs(MIB);
+ }
// For floating point we need to move the result to a comparison register
// that we can then use for branches.
- if (isFloat)
+ if (Ty->isFloatTy() || Ty->isDoubleTy())
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::FMSTAT)));
+ return true;
+}
+
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+ Type *Ty = CI->getOperand(0)->getType();
+
+ // Get the compare predicate.
+ ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ // Emit the compare.
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ return false;
// Now set a register based on the comparison. Explicitly set the predicates
// here.
- unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
- TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
+ unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
+ TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass
: ARM::GPRRegisterClass;
unsigned DestReg = createResultReg(RC);
- Constant *Zero
- = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+ Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
unsigned ZeroReg = TargetMaterializeConstant(Zero);
+ bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
+ unsigned CondReg = isFloat ? ARM::FPSCR : ARM::CPSR;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
.addReg(ZeroReg).addImm(1)
.addImm(ARMPred).addReg(CondReg);
if (!Subtarget->hasVFP2()) return false;
MVT DstVT;
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
if (!isTypeLegal(Ty, DstVT))
return false;
- unsigned Op = getRegForValue(I->getOperand(0));
- if (Op == 0) return false;
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0) return false;
+
+ // Handle sign-extension.
+ if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
+ EVT DestVT = MVT::i32;
+ unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, /*isZExt*/ false);
+ if (ResultReg == 0) return false;
+ SrcReg = ResultReg;
+ }
// The conversion routine works on fp-reg to fp-reg and the operand above
// was an integer, move it to the fp registers if possible.
- unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
+ unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
if (FP == 0) return false;
unsigned Opc;
if (Ty->isFloatTy()) Opc = ARM::VSITOS;
else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
- else return 0;
+ else return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
if (!Subtarget->hasVFP2()) return false;
MVT DstVT;
- const Type *RetTy = I->getType();
+ Type *RetTy = I->getType();
if (!isTypeLegal(RetTy, DstVT))
return false;
if (Op == 0) return false;
unsigned Opc;
- const Type *OpTy = I->getOperand(0)->getType();
+ Type *OpTy = I->getOperand(0)->getType();
if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
- else return 0;
+ else return false;
// f64->s32 or f32->s32 both need an intermediate f32 reg.
unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
unsigned Op2Reg = getRegForValue(I->getOperand(2));
if (Op2Reg == 0) return false;
- unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ unsigned CmpOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
.addReg(CondReg).addImm(1));
unsigned ResultReg = createResultReg(RC);
- unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
+ unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
.addReg(Op1Reg).addReg(Op2Reg)
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
bool ARMFastISel::SelectSDiv(const Instruction *I) {
MVT VT;
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
if (!isTypeLegal(Ty, VT))
return false;
bool ARMFastISel::SelectSRem(const Instruction *I) {
MVT VT;
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
if (!isTypeLegal(Ty, VT))
return false;
// operations, but can't figure out how to. Just use the vfp instructions
// if we have them.
// FIXME: It'd be nice to use NEON instructions.
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
if (isFloat && !Subtarget->hasVFP2())
return false;
CallingConv::ID CC,
unsigned &NumBytes) {
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, false, TM, ArgLocs, *Context);
+ CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
// Get a count of how many bytes are to be pushed on the stack.
NumBytes = CCInfo.getNextStackOffset();
// Issue CALLSEQ_START
- unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(AdjStackDown))
.addImm(NumBytes));
unsigned Arg = ArgRegs[VA.getValNo()];
MVT ArgVT = ArgVTs[VA.getValNo()];
- // We don't handle NEON parameters yet.
- if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() > 64)
+ // We don't handle NEON/vector parameters yet.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
return false;
// Handle arg promotion, etc.
switch (VA.getLocInfo()) {
case CCValAssign::Full: break;
case CCValAssign::SExt: {
- bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
- Emitted = true;
- ArgVT = VA.getLocVT();
+ EVT DestVT = VA.getLocVT();
+ unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT,
+ /*isZExt*/false);
+ assert (ResultReg != 0 && "Failed to emit a sext");
+ Arg = ResultReg;
break;
}
+ case CCValAssign::AExt:
+ // Intentional fall-through. Handle AExt and ZExt.
case CCValAssign::ZExt: {
- bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
- Emitted = true;
- ArgVT = VA.getLocVT();
- break;
- }
- case CCValAssign::AExt: {
- bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
- if (!Emitted)
- Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
- if (!Emitted)
- Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
-
- assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
- ArgVT = VA.getLocVT();
+ EVT DestVT = VA.getLocVT();
+ unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT,
+ /*isZExt*/true);
+ assert (ResultReg != 0 && "Failed to emit a sext");
+ Arg = ResultReg;
break;
}
case CCValAssign::BCvt: {
- unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BIT_CONVERT, Arg,
+ unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
/*TODO: Kill=*/false);
assert(BC != 0 && "Failed to emit a bitcast!");
Arg = BC;
if (VA.isRegLoc() && !VA.needsCustom()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
VA.getLocReg())
- .addReg(Arg);
+ .addReg(Arg);
RegArgs.push_back(VA.getLocReg());
} else if (VA.needsCustom()) {
// TODO: We need custom lowering for vector (v2f64) args.
} else {
assert(VA.isMemLoc());
// Need to store on the stack.
- unsigned Base = ARM::SP;
- int Offset = VA.getLocMemOffset();
+ Address Addr;
+ Addr.BaseType = Address::RegBase;
+ Addr.Base.Reg = ARM::SP;
+ Addr.Offset = VA.getLocMemOffset();
- if (!ARMEmitStore(ArgVT, Arg, Base, Offset)) return false;
+ if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
}
}
return true;
const Instruction *I, CallingConv::ID CC,
unsigned &NumBytes) {
// Issue CALLSEQ_END
- unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(AdjStackUp))
.addImm(NumBytes).addImm(0));
// Now the return value.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, TM, RVLocs, *Context);
+ CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
// Copy all of the result registers out of their specified physreg.
} else {
assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
EVT CopyVT = RVLocs[0].getValVT();
+
+ // Special handling for extended integers.
+ if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
+ CopyVT = MVT::i32;
+
TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
unsigned ResultReg = createResultReg(DstRC);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
const Value *RV = Ret->getOperand(0);
// Only handle register returns for now.
if (!VA.isRegLoc())
return false;
- // TODO: For now, don't try to handle cases where getLocInfo()
- // says Full but the types don't match.
- if (TLI.getValueType(RV->getType()) != VA.getValVT())
- return false;
- // Make the copy.
unsigned SrcReg = Reg + VA.getValNo();
+ EVT RVVT = TLI.getValueType(RV->getType());
+ EVT DestVT = VA.getValVT();
+ // Special handling for extended integers.
+ if (RVVT != DestVT) {
+ if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
+ return false;
+
+ if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
+ return false;
+
+ assert(DestVT == MVT::i32 && "ARM should always ext to i32");
+
+ bool isZExt = Outs[0].Flags.isZExt();
+ unsigned ResultReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, isZExt);
+ if (ResultReg == 0) return false;
+ SrcReg = ResultReg;
+ }
+
+ // Make the copy.
unsigned DstReg = VA.getLocReg();
const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
// Avoid a cross-class copy. This is very unlikely.
MRI.addLiveOut(VA.getLocReg());
}
- unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+ unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(RetOpc)));
return true;
}
+unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
+
+ // Darwin needs the r9 versions of the opcodes.
+ bool isDarwin = Subtarget->isTargetDarwin();
+ if (isThumb2) {
+ return isDarwin ? ARM::tBLr9 : ARM::tBL;
+ } else {
+ return isDarwin ? ARM::BLr9 : ARM::BL;
+ }
+}
+
// A quick function that will emit a call for a named libcall in F with the
// vector of passed arguments for the Instruction in I. We can assume that we
// can emit a call for any libcall we can produce. This is an abridged version
CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
// Handle *simple* calls for now.
- const Type *RetTy = I->getType();
+ Type *RetTy = I->getType();
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
else if (!isTypeLegal(RetTy, RetVT))
return false;
- // For now we're using BLX etc on the assumption that we have v5t ops.
- if (!Subtarget->hasV5TOps()) return false;
+ // TODO: For now if we have long calls specified we don't handle the call.
+ if (EnableARMLongCalls) return false;
// Set up the argument vectors.
SmallVector<Value*, 8> Args;
unsigned Arg = getRegForValue(Op);
if (Arg == 0) return false;
- const Type *ArgTy = Op->getType();
+ Type *ArgTy = Op->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT)) return false;
if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
return false;
- // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+ // Issue the call, BLr9 for darwin, BL otherwise.
// TODO: Turn this into the table of arm call ops.
MachineInstrBuilder MIB;
- unsigned CallOpc;
- if(isThumb)
- CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ unsigned CallOpc = ARMSelectCallOp(NULL);
+ if(isThumb2)
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc)))
+ .addExternalSymbol(TLI.getLibcallName(Call));
else
- CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
- .addExternalSymbol(TLI.getLibcallName(Call));
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addExternalSymbol(TLI.getLibcallName(Call)));
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
// Can't handle inline asm or worry about intrinsics yet.
if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
- // Only handle global variable Callees that are direct calls.
+ // Only handle global variable Callees.
const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
- if (!GV || Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()))
+ if (!GV)
return false;
// Check the calling convention.
// TODO: Avoid some calling conventions?
// Let SDISel handle vararg functions.
- const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
if (FTy->isVarArg())
return false;
// Handle *simple* calls for now.
- const Type *RetTy = I->getType();
+ Type *RetTy = I->getType();
MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
- else if (!isTypeLegal(RetTy, RetVT))
+ else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
+ RetVT != MVT::i8 && RetVT != MVT::i1)
return false;
- // For now we're using BLX etc on the assumption that we have v5t ops.
- // TODO: Maybe?
- if (!Subtarget->hasV5TOps()) return false;
+ // TODO: For now if we have long calls specified we don't handle the call.
+ if (EnableARMLongCalls) return false;
// Set up the argument vectors.
SmallVector<Value*, 8> Args;
if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
- // FIXME: Only handle *easy* calls for now.
+ // FIXME: Only handle *easy* calls for now.
if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
CS.paramHasAttr(AttrInd, Attribute::Nest) ||
CS.paramHasAttr(AttrInd, Attribute::ByVal))
return false;
- const Type *ArgTy = (*i)->getType();
+ Type *ArgTy = (*i)->getType();
MVT ArgVT;
- if (!isTypeLegal(ArgTy, ArgVT))
+ if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
+ ArgVT != MVT::i1)
return false;
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
return false;
- // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+ // Issue the call, BLr9 for darwin, BL otherwise.
// TODO: Turn this into the table of arm call ops.
MachineInstrBuilder MIB;
- unsigned CallOpc;
- if(isThumb)
- CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ unsigned CallOpc = ARMSelectCallOp(GV);
+ // Explicitly adding the predicate here.
+ if(isThumb2)
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc)))
+ .addGlobalAddress(GV, 0, 0);
else
- CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
- .addGlobalAddress(GV, 0, 0);
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addGlobalAddress(GV, 0, 0));
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
return true;
+}
+
+bool ARMFastISel::SelectTrunc(const Instruction *I) {
+ // The high bits for a type smaller than the register size are assumed to be
+ // undefined.
+ Value *Op = I->getOperand(0);
+
+ EVT SrcVT, DestVT;
+ SrcVT = TLI.getValueType(Op->getType(), true);
+ DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+ return false;
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Op);
+ if (!SrcReg) return false;
+
+ // Because the high bits are undefined, a truncate doesn't generate
+ // any code.
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
+ bool isZExt) {
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return 0;
+
+ unsigned Opc;
+ bool isBoolZext = false;
+ if (!SrcVT.isSimple()) return 0;
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i16:
+ if (!Subtarget->hasV6Ops()) return 0;
+ if (isZExt)
+ Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
+ else
+ Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
+ break;
+ case MVT::i8:
+ if (!Subtarget->hasV6Ops()) return 0;
+ if (isZExt)
+ Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
+ else
+ Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
+ break;
+ case MVT::i1:
+ if (isZExt) {
+ Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
+ isBoolZext = true;
+ break;
+ }
+ return 0;
+ }
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(SrcReg);
+ if (isBoolZext)
+ MIB.addImm(1);
+ else
+ MIB.addImm(0);
+ AddOptionalDefs(MIB);
+ return ResultReg;
+}
+bool ARMFastISel::SelectIntExt(const Instruction *I) {
+ // On ARM, in general, integer casts don't involve legal types; this code
+ // handles promotable integers.
+ // FIXME: We could save an instruction in many cases by special-casing
+ // load instructions.
+ Type *DestTy = I->getType();
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+
+ EVT SrcVT, DestVT;
+ SrcVT = TLI.getValueType(SrcTy, true);
+ DestVT = TLI.getValueType(DestTy, true);
+
+ bool isZExt = isa<ZExtInst>(I);
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg) return false;
+
+ unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
+ if (ResultReg == 0) return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
}
// TODO: SoftFP support.
return SelectSelect(I);
case Instruction::Ret:
return SelectRet(I);
+ case Instruction::Trunc:
+ return SelectTrunc(I);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return SelectIntExt(I);
default: break;
}
return false;
llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
// Completely untested on non-darwin.
const TargetMachine &TM = funcInfo.MF->getTarget();
-
+
// Darwin and thumb1 only for now.
const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
- if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
+ if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
!DisableARMFastISel)
return new ARMFastISel(funcInfo);
return 0;