X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMFastISel.cpp;h=ea03fb7a6011dfd3318738459378b67a1721a01a;hb=6ce2deacefa5fd2565983d513d07a06d6a8af602;hp=81a93b1ed3ba7fae1cf4dfb46d8d8244572d0dfd;hpb=57b299796685033c87a5414e179b95b5ae7dc8d4;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 81a93b1ed3b..ea03fb7a601 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -37,7 +37,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -182,9 +181,12 @@ class ARMFastISel : public FastISel { bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, bool isZExt, bool allocReg); - bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); + bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + unsigned Alignment = 0); bool ARMComputeAddress(const Value *Obj, Address &Addr); void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); + bool ARMIsMemCpySmall(uint64_t Len); + bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len); unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt); unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); unsigned ARMMaterializeInt(const Constant *C, EVT VT); @@ -195,8 +197,6 @@ class ARMFastISel : public FastISel { // Call handling routines. private: - bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, - unsigned &ResultReg); CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); bool ProcessCallArgs(SmallVectorImpl &Args, SmallVectorImpl &ArgRegs, @@ -685,6 +685,8 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { return 0; } +// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF); + unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { // Don't handle dynamic allocas. if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; @@ -874,9 +876,9 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { // Integer loads/stores handle 12-bit offsets. needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); // Handle negative offsets. - if (isThumb2) - needsLowering = !(needsLowering && Subtarget->hasV6T2Ops() && - Addr.Offset < 0 && Addr.Offset > -256); + if (needsLowering && isThumb2) + needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 && + Addr.Offset > -256); } else { // ARM halfword load/stores and signed byte loads use +/-imm8 offsets. needsLowering = (Addr.Offset > 255 || Addr.Offset < -255); @@ -936,7 +938,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addFrameIndex(FI); - // ARM halfword load/stores and signed byte loads need an additional operand. + // ARM halfword load/stores and signed byte loads need an additional + // operand. if (useAM3) { signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; MIB.addReg(0); @@ -949,7 +952,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addReg(Addr.Base.Reg); - // ARM halfword load/stores and signed byte loads need an additional operand. + // ARM halfword load/stores and signed byte loads need an additional + // operand. if (useAM3) { signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; MIB.addReg(0); @@ -1052,7 +1056,8 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { return true; } -bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { +bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + unsigned Alignment) { unsigned StrOpc; bool useAM3 = false; switch (VT.getSimpleVT().SimpleTy) { @@ -1101,9 +1106,23 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { case MVT::f32: if (!Subtarget->hasVFP2()) return false; StrOpc = ARM::VSTRS; + // Unaligned stores need special handling. + if (Alignment && Alignment < 4) { + unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVRS), MoveReg) + .addReg(SrcReg)); + SrcReg = MoveReg; + VT = MVT::i32; + StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12; + } break; case MVT::f64: if (!Subtarget->hasVFP2()) return false; + // FIXME: Unaligned stores need special handling. + if (Alignment && Alignment < 8) { + return false; + } StrOpc = ARM::VSTRD; break; } @@ -1113,7 +1132,7 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { // Create the base instruction, then add the operands. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(StrOpc)) - .addReg(SrcReg, getKillRegState(true)); + .addReg(SrcReg); AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3); return true; } @@ -1140,7 +1159,8 @@ bool ARMFastISel::SelectStore(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(1), Addr)) return false; - if (!ARMEmitStore(VT, SrcReg, Addr)) return false; + if (!ARMEmitStore(VT, SrcReg, Addr, cast(I)->getAlignment())) + return false; return true; } @@ -1302,6 +1322,8 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, int Imm = 0; bool UseImm = false; bool isNegativeImm = false; + // FIXME: At -O0 we don't have anything that canonicalizes operand order. + // Thus, Src1Value may be a ConstantInt, but we're missing it. if (const ConstantInt *ConstInt = dyn_cast(Src2Value)) { if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { @@ -1357,7 +1379,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, unsigned SrcReg1 = getRegForValue(Src1Value); if (SrcReg1 == 0) return false; - unsigned SrcReg2; + unsigned SrcReg2 = 0; if (!UseImm) { SrcReg2 = getRegForValue(Src2Value); if (SrcReg2 == 0) return false; @@ -1574,7 +1596,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { (ARM_AM::getSOImmVal(Imm) != -1); } - unsigned Op2Reg; + unsigned Op2Reg = 0; if (!UseImm) { Op2Reg = getRegForValue(I->getOperand(2)); if (Op2Reg == 0) return false; @@ -1667,12 +1689,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { if (isFloat && !Subtarget->hasVFP2()) return false; - unsigned Op1 = getRegForValue(I->getOperand(0)); - if (Op1 == 0) return false; - - unsigned Op2 = getRegForValue(I->getOperand(1)); - if (Op2 == 0) return false; - unsigned Opc; bool is64bit = VT == MVT::f64 || VT == MVT::i64; switch (ISDOpcode) { @@ -1687,6 +1703,12 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { Opc = is64bit ? ARM::VMULD : ARM::VMULS; break; } + unsigned Op1 = getRegForValue(I->getOperand(0)); + if (Op1 == 0) return false; + + unsigned Op2 = getRegForValue(I->getOperand(1)); + if (Op2 == 0) return false; + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) @@ -1697,18 +1719,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { // Call Handling Code -bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, - EVT SrcVT, unsigned &ResultReg) { - unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, - Src, /*TODO: Kill=*/false); - - if (RR != 0) { - ResultReg = RR; - return true; - } else - return false; -} - // This is largely taken directly from CCAssignFnForNode - we don't support // varargs in FastISel so that part has been removed. // TODO: We may not support all of this. @@ -1725,7 +1735,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { // Use target triple & subtarget features to do actual dispatch. if (Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard) + TM.Options.FloatABIType == FloatABI::Hard) return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); else return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); @@ -1774,21 +1784,23 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::SExt: { - EVT DestVT = VA.getLocVT(); + MVT DestVT = VA.getLocVT(); unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false); assert (ResultReg != 0 && "Failed to emit a sext"); Arg = ResultReg; + ArgVT = DestVT; break; } case CCValAssign::AExt: // Intentional fall-through. Handle AExt and ZExt. case CCValAssign::ZExt: { - EVT DestVT = VA.getLocVT(); + MVT DestVT = VA.getLocVT(); unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); assert (ResultReg != 0 && "Failed to emit a sext"); Arg = ResultReg; + ArgVT = DestVT; break; } case CCValAssign::BCvt: { @@ -2117,9 +2129,6 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (IntrMemName && e-i <= 2) break; - unsigned Arg = getRegForValue(*i); - if (Arg == 0) - return false; ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; if (CS.paramHasAttr(AttrInd, Attribute::SExt)) @@ -2139,6 +2148,11 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 && ArgVT != MVT::i1) return false; + + unsigned Arg = getRegForValue(*i); + if (Arg == 0) + return false; + unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); Flags.setOrigAlign(OriginalAlignment); @@ -2193,18 +2207,70 @@ bool ARMFastISel::SelectCall(const Instruction *I, return true; } +bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) { + return Len <= 16; +} + +bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len) { + // Make sure we don't bloat code by inlining very large memcpy's. + if (!ARMIsMemCpySmall(Len)) + return false; + + // We don't care about alignment here since we just emit integer accesses. + while (Len) { + MVT VT; + if (Len >= 4) + VT = MVT::i32; + else if (Len >= 2) + VT = MVT::i16; + else { + assert(Len == 1); + VT = MVT::i8; + } + + bool RV; + unsigned ResultReg; + RV = ARMEmitLoad(VT, ResultReg, Src); + assert (RV = true && "Should be able to handle this load."); + RV = ARMEmitStore(VT, ResultReg, Dest); + assert (RV = true && "Should be able to handle this store."); + + unsigned Size = VT.getSizeInBits()/8; + Len -= Size; + Dest.Offset += Size; + Src.Offset += Size; + } + + return true; +} + bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; case Intrinsic::memcpy: case Intrinsic::memmove: { - // FIXME: Small memcpy/memmove's are common enough that we want to do them - // without a call if possible. const MemTransferInst &MTI = cast(I); // Don't handle volatile. if (MTI.isVolatile()) return false; + + // Disable inlining for memmove before calls to ComputeAddress. Otherwise, + // we would emit dead code because we don't currently handle memmoves. + bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy); + if (isa(MTI.getLength()) && isMemCpy) { + // Small memcpy's are common enough that we want to do them without a call + // if possible. + uint64_t Len = cast(MTI.getLength())->getZExtValue(); + if (ARMIsMemCpySmall(Len)) { + Address Dest, Src; + if (!ARMComputeAddress(MTI.getRawDest(), Dest) || + !ARMComputeAddress(MTI.getRawSource(), Src)) + return false; + if (ARMTryEmitSmallMemCpy(Dest, Src, Len)) + return true; + } + } if (!MTI.getLength()->getType()->isIntegerTy(32)) return false;