X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMISelLowering.cpp;h=22efa48709e97343d555a1c790d10058b9639f2b;hb=09aa3f0ef35d9241c92439d74b8d5e9a81d814c2;hp=3111ff598b6349f187ab9e0ad9d8f7409d2e5507;hpb=75f0288b7dff1fcb24d9d61ea3e9547ba21382e4;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 3111ff598b6..22efa48709e 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/Function.h" #include "llvm/GlobalValue.h" #include "llvm/Instruction.h" +#include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/Type.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -58,13 +59,6 @@ EnableARMTailCalls("arm-tail-calls", cl::Hidden, cl::desc("Generate tail calls (TEMPORARY OPTION)."), cl::init(false)); -// This option should go away when Machine LICM is smart enough to hoist a -// reg-to-reg VDUP. -static cl::opt -EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden, - cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."), - cl::init(false)); - static cl::opt EnableARMLongCalls("arm-long-calls", cl::Hidden, cl::desc("Generate calls via indirect call instructions"), @@ -75,11 +69,6 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); -static cl::opt -EnableARMCodePlacement("arm-code-placement", cl::Hidden, - cl::desc("Enable code placement pass for ARM"), - cl::init(false)); - void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { @@ -95,8 +84,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); - if (ElemTy == MVT::i8 || ElemTy == MVT::i16) - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); if (ElemTy != MVT::i32) { setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); @@ -115,6 +103,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); + for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) + setTruncStoreAction(VT.getSimpleVT(), + (MVT::SimpleValueType)InnerVT, Expand); } setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); @@ -245,13 +237,157 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SRL_I128, 0); setLibcallName(RTLIB::SRA_I128, 0); - // Libcalls should use the AAPCS base standard ABI, even if hard float - // is in effect, as per the ARM RTABI specification, section 4.1.2. if (Subtarget->isAAPCS_ABI()) { - for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { - setLibcallCallingConv(static_cast(i), - CallingConv::ARM_AAPCS); - } + // Double-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 2 + setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); + setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); + setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); + setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); + setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); + + // Double-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 3 + setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); + setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); + setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); + setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); + setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); + setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); + setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); + setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); + setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); + setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); + setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); + setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); + setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); + setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); + setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); + setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); + setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); + + // Single-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 4 + setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); + setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); + setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); + setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); + setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); + + // Single-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 5 + setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); + setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); + setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); + setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); + setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); + setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); + setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); + setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); + setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); + setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); + setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); + setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); + setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); + setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); + setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); + setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); + setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); + + // Floating-point to integer conversions. + // RTABI chapter 4.1.2, Table 6 + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); + setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); + setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); + setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); + setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); + setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); + setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); + setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); + + // Conversions between floating types. + // RTABI chapter 4.1.2, Table 7 + setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); + setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); + setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); + + // Integer to floating-point conversions. + // RTABI chapter 4.1.2, Table 8 + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); + setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); + setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); + setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); + setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); + setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); + + // Long long helper functions + // RTABI chapter 4.2, Table 9 + setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); + setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); + setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); + setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); + setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); + setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); + setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); + + // Integer division functions + // RTABI chapter 4.3.1 + setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); + setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); + setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); + setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); + setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); + setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); + setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); } if (Subtarget->isThumb1Only()) @@ -327,6 +463,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::BUILD_VECTOR); + setTargetDAGCombine(ISD::VECTOR_SHUFFLE); } computeRegisterProperties(); @@ -383,7 +520,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::BSWAP, MVT::i32, Expand); // These are expanded into libcalls. - if (!Subtarget->hasDivide()) { + if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { // v7M has a hardware divider setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); @@ -416,7 +553,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. if (Subtarget->hasDataBarrier() || - (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) { + (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { // membarrier needs custom lowering; the rest are legal and handled // normally. setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); @@ -460,6 +597,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. if (!Subtarget->hasV6Ops()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -470,7 +609,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. - setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); + setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); } @@ -479,6 +618,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (Subtarget->isTargetDarwin()) { setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom); } setOperationAction(ISD::SETCC, MVT::i32, Expand); @@ -533,8 +673,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); - if (Subtarget->hasV6T2Ops()) + if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) setTargetDAGCombine(ISD::OR); + if (Subtarget->hasNEON()) + setTargetDAGCombine(ISD::AND); setStackPointerRegisterToSaveRestore(ARM::SP); @@ -549,8 +691,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // are at least 4 bytes aligned. setMinStackArgumentAlignment(4); - if (EnableARMCodePlacement) - benefitFromCodePlacementOpt = true; + benefitFromCodePlacementOpt = true; } std::pair @@ -598,7 +739,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; - case ARMISD::AND: return "ARMISD::AND"; case ARMISD::CMP: return "ARMISD::CMP"; case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; @@ -619,11 +759,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; - case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; - case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; + case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; + case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; + case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP"; case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; @@ -632,7 +773,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; - case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER"; + case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; + + case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCGE: return "ARMISD::VCGE"; @@ -679,6 +822,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; case ARMISD::BFI: return "ARMISD::BFI"; + case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; + case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; + case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; + case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; + case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; } } @@ -721,6 +869,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { for (unsigned i = 0; i != NumVals; ++i) { EVT VT = N->getValueType(i); + if (VT == MVT::Flag || VT == MVT::Other) + continue; if (VT.isFloatingPoint() || VT.isVector()) return Sched::Latency; } @@ -732,24 +882,28 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // is not available. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - if (TID.mayLoad()) - return Sched::Latency; - if (!Itins->isEmpty() && Itins->getStageLatency(TID.getSchedClass()) > 2) + if (TID.getNumDefs() == 0) + return Sched::RegPressure; + if (!Itins->isEmpty() && + Itins->getOperandCycle(TID.getSchedClass(), 0) > 2) return Sched::Latency; + return Sched::RegPressure; } unsigned ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { + const TargetFrameInfo *TFI = MF.getTarget().getFrameInfo(); + switch (RC->getID()) { default: return 0; case ARM::tGPRRegClassID: - return RegInfo->hasFP(MF) ? 4 : 5; + return TFI->hasFP(MF) ? 4 : 5; case ARM::GPRRegClassID: { - unsigned FP = RegInfo->hasFP(MF) ? 1 : 0; + unsigned FP = TFI->hasFP(MF) ? 1 : 0; return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0); } case ARM::SPRRegClassID: // Currently not used as 'rep' register class. @@ -822,23 +976,29 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, switch (CC) { default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::C: case CallingConv::Fast: + if (Subtarget->hasVFP2() && !isVarArg) { + if (!Subtarget->isAAPCS_ABI()) + return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); + // For AAPCS ABI targets, just use VFP variant of the calling convention. + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + } + // Fallthrough + case CallingConv::C: { // Use target triple & subtarget features to do actual dispatch. - if (Subtarget->isAAPCS_ABI()) { - if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard && !isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); - else - return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); - } else - return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + if (!Subtarget->isAAPCS_ABI()) + return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + else if (Subtarget->hasVFP2() && + FloatABIType == FloatABI::Hard && !isVarArg) + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + } case CallingConv::ARM_AAPCS_VFP: - return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); case CallingConv::ARM_AAPCS: - return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); case CallingConv::ARM_APCS: - return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); } } @@ -905,7 +1065,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: - Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); + Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); break; } @@ -928,7 +1088,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, /*AlwaysInline=*/false, - NULL, 0, NULL, 0); + MachinePointerInfo(0), MachinePointerInfo(0)); } /// LowerMemOpCallTo - Store the argument to the stack. @@ -941,11 +1101,11 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); - if (Flags.isByVal()) { + if (Flags.isByVal()) return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); - } + return DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset, + MachinePointerInfo::getStack(LocMemOffset), false, false, 0); } @@ -1053,7 +1213,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); break; } @@ -1153,7 +1313,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); } else if (ExternalSymbolSDNode *S=dyn_cast(Callee)) { const char *Sym = S->getSymbol(); @@ -1167,7 +1327,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); } } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { @@ -1189,13 +1349,19 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); - } else - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); + } else { + // On ELF targets for PIC code, direct calls should go through the PLT + unsigned OpFlags = 0; + if (Subtarget->isTargetELF() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) + OpFlags = ARMII::MO_PLT; + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); + } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { isDirect = true; bool isStub = Subtarget->isTargetDarwin() && @@ -1211,13 +1377,19 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); - } else - Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); + } else { + unsigned OpFlags = 0; + // On ELF targets for PIC code, direct calls should go through the PLT + if (Subtarget->isTargetELF() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) + OpFlags = ARMII::MO_PLT; + Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); + } } // FIXME: handle tail calls differently. @@ -1347,30 +1519,13 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // whether LR is going to be used. Probably the right approach is to // generate the tail call here and turn it back into CALL/RET in // emitEpilogue if LR is used. - if (Subtarget->isThumb1Only()) - return false; - - // For the moment, we can only do this to functions defined in this - // compilation, or to indirect calls. A Thumb B to an ARM function, - // or vice versa, is not easily fixed up in the linker unlike BL. - // (We could do this by loading the address of the callee into a register; - // that is an extra instruction over the direct call and burns a register - // as well, so is not likely to be a win.) - - // It might be safe to remove this restriction on non-Darwin. // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, // but we need to make sure there are enough registers; the only valid // registers are the 4 used for parameters. We don't currently do this // case. - if (isa(Callee)) - return false; - - if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - const GlobalValue *GV = G->getGlobal(); - if (GV->isDeclaration() || GV->isWeakForLinker()) - return false; - } + if (Subtarget->isThumb1Only()) + return false; // If the calling conventions do not match, then we'd better make sure the // results are returned in the same way as what the caller expects. @@ -1498,7 +1653,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); break; } @@ -1548,6 +1703,61 @@ ARMTargetLowering::LowerReturn(SDValue Chain, return result; } +bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { + if (N->getNumValues() != 1) + return false; + if (!N->hasNUsesOfValue(1, 0)) + return false; + + unsigned NumCopies = 0; + SDNode* Copies[2]; + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() == ISD::CopyToReg) { + Copies[NumCopies++] = Use; + } else if (Use->getOpcode() == ARMISD::VMOVRRD) { + // f64 returned in a pair of GPRs. + for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end(); + UI != UE; ++UI) { + if (UI->getOpcode() != ISD::CopyToReg) + return false; + Copies[UI.getUse().getResNo()] = *UI; + ++NumCopies; + } + } else if (Use->getOpcode() == ISD::BITCAST) { + // f32 returned in a single GPR. + if (!Use->hasNUsesOfValue(1, 0)) + return false; + Use = *Use->use_begin(); + if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0)) + return false; + Copies[NumCopies++] = Use; + } else { + return false; + } + + if (NumCopies != 1 && NumCopies != 2) + return false; + + bool HasRet = false; + for (unsigned i = 0; i < NumCopies; ++i) { + SDNode *Copy = Copies[i]; + for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); + UI != UE; ++UI) { + if (UI->getOpcode() == ISD::CopyToReg) { + SDNode *Use = *UI; + if (Use == Copies[0] || Use == Copies[1]) + continue; + return false; + } + if (UI->getOpcode() != ARMISD::RET_FLAG) + return false; + HasRet = true; + } + } + + return HasRet; +} + // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise @@ -1595,7 +1805,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); if (RelocM == Reloc::Static) return Result; @@ -1615,11 +1825,11 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, - ARMCP::CPValue, PCAdj, "tlsgd", true); + ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); SDValue Chain = Argument.getValue(1); @@ -1662,11 +1872,11 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, - ARMCP::CPValue, PCAdj, "gottpoff", true); + ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); Chain = Offset.getValue(1); @@ -1674,15 +1884,15 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); } else { // local exec model - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); } @@ -1714,20 +1924,19 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, if (RelocM == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT"); + new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); SDValue Chain = Result.getValue(1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) Result = DAG.getLoad(PtrVT, dl, Chain, Result, - PseudoSourceValue::getGOT(), 0, - false, false, 0); + MachinePointerInfo::getGOT(), false, false, 0); return Result; } else { // If we have T2 ops, we can materialize the address directly via movt/movw @@ -1739,7 +1948,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); } } @@ -1767,7 +1976,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); SDValue Chain = Result.getValue(1); @@ -1777,8 +1986,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, } if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) - Result = DAG.getLoad(PtrVT, dl, Chain, Result, - PseudoSourceValue::getGOT(), 0, + Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), false, false, 0); return Result; @@ -1800,12 +2008,20 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } +SDValue +ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) + const { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, + Op.getOperand(0), Op.getOperand(1)); +} + SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -1849,7 +2065,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); if (RelocM == Reloc::PIC_) { @@ -1864,21 +2080,55 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { DebugLoc dl = Op.getDebugLoc(); - SDValue Op5 = Op.getOperand(5); - unsigned isDeviceBarrier = cast(Op5)->getZExtValue(); - // Some subtargets which have dmb and dsb instructions can handle barriers - // directly. Some ARMv6 cpus can support them with the help of mcr - // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should - // never get here. - unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; - if (Subtarget->hasDataBarrier()) - return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); - else { - assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() && + if (!Subtarget->hasDataBarrier()) { + // Some ARMv6 cpus can support data barriers with an mcr instruction. + // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get + // here. + assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); - return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), + return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, MVT::i32)); } + + SDValue Op5 = Op.getOperand(5); + bool isDeviceBarrier = cast(Op5)->getZExtValue() != 0; + unsigned isLL = cast(Op.getOperand(1))->getZExtValue(); + unsigned isLS = cast(Op.getOperand(2))->getZExtValue(); + bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0); + + ARM_MB::MemBOpt DMBOpt; + if (isDeviceBarrier) + DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY; + else + DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH; + return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(DMBOpt, MVT::i32)); +} + +static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + // ARM pre v5TE and Thumb1 does not have preload instructions. + if (!(Subtarget->isThumb2() || + (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) + // Just preserve the chain. + return Op.getOperand(0); + + DebugLoc dl = Op.getDebugLoc(); + unsigned isRead = ~cast(Op.getOperand(2))->getZExtValue() & 1; + if (!isRead && + (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) + // ARMv7 with MP extension has PLDW. + return Op.getOperand(0); + + if (Subtarget->isThumb()) + // Invert the bits. + isRead = ~isRead & 1; + unsigned isData = Subtarget->isThumb() ? 0 : 1; + + // Currently there is no intrinsic that matches pli. + return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), + Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), + DAG.getConstant(isData, MVT::i32)); } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { @@ -1891,8 +2141,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, - false, false, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); } SDValue @@ -1920,7 +2170,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); @@ -1974,7 +2224,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0); } else { ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], @@ -2015,7 +2265,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); break; case CCValAssign::SExt: ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, @@ -2043,7 +2293,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } @@ -2069,7 +2319,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, AFI->setVarArgsFrameIndex( MFI->CreateFixedObject(VARegSaveSize, ArgOffset + VARegSaveSize - VARegSize, - true)); + false)); SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy()); @@ -2085,8 +2335,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), - 0, false, false, 0); + MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), + false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); @@ -2299,8 +2549,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { if (LoadSDNode *Ld = dyn_cast(Op)) return DAG.getLoad(MVT::i32, Op.getDebugLoc(), - Ld->getChain(), Ld->getBasePtr(), - Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->getAlignment()); @@ -2319,7 +2568,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue Ptr = Ld->getBasePtr(); RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), Ld->getChain(), Ptr, - Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->getAlignment()); @@ -2329,7 +2578,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, PtrType, Ptr, DAG.getConstant(4, PtrType)); RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), Ld->getChain(), NewPtr, - Ld->getSrcValue(), Ld->getSrcValueOffset() + 4, + Ld->getPointerInfo().getWithOffset(4), Ld->isVolatile(), Ld->isNonTemporal(), NewAlign); return; @@ -2454,14 +2703,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { } if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0, + MachinePointerInfo::getJumpTable(), false, false, 0); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0, false, false, 0); + MachinePointerInfo::getJumpTable(), false, false, 0); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } @@ -2482,7 +2731,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { break; } Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); } static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -2501,7 +2750,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { break; } - Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); + Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); return DAG.getNode(Opc, dl, VT, Op); } @@ -2533,7 +2782,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ SDValue Offset = DAG.getConstant(4, MVT::i32); return DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), - NULL, 0, false, false, 0); + MachinePointerInfo(), false, false, 0); } // Return LR, which contains the return address. Mark it an implicit live-in. @@ -2552,17 +2801,18 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { ? ARM::R7 : ARM::R11; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, + MachinePointerInfo(), false, false, 0); return FrameAddr; } -/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to +/// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 /// operand type is illegal (e.g., v2f32 for a target that doesn't support /// vectors), since the legalizer won't know what to do with that. -static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { +static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); DebugLoc dl = N->getDebugLoc(); SDValue Op = N->getOperand(0); @@ -2572,7 +2822,7 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { EVT SrcVT = Op.getValueType(); EVT DstVT = N->getValueType(0); assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && - "ExpandBIT_CONVERT called for non-i64 type"); + "ExpandBITCAST called for non-i64 type"); // Turn i64->f64 into VMOVDRR. if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { @@ -2580,7 +2830,7 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(1, MVT::i32)); - return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT, + return DAG.getNode(ISD::BITCAST, dl, DstVT, DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); } @@ -2607,7 +2857,7 @@ static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); + return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } /// LowerShiftRightParts - Lower SRA_PARTS, which returns two @@ -2715,33 +2965,40 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); + if (!VT.isVector()) + return SDValue(); + // Lower vector shifts on NEON to use VSHL. - if (VT.isVector()) { - assert(ST->hasNEON() && "unexpected vector shift"); - - // Left shifts translate directly to the vshiftu intrinsic. - if (N->getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), - N->getOperand(0), N->getOperand(1)); - - assert((N->getOpcode() == ISD::SRA || - N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); - - // NEON uses the same intrinsics for both left and right shifts. For - // right shifts, the shift amounts are negative, so negate the vector of - // shift amounts. - EVT ShiftVT = N->getOperand(1).getValueType(); - SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, - getZeroVector(ShiftVT, DAG, dl), - N->getOperand(1)); - Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? - Intrinsic::arm_neon_vshifts : - Intrinsic::arm_neon_vshiftu); + assert(ST->hasNEON() && "unexpected vector shift"); + + // Left shifts translate directly to the vshiftu intrinsic. + if (N->getOpcode() == ISD::SHL) return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(vshiftInt, MVT::i32), - N->getOperand(0), NegatedCount); - } + DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), + N->getOperand(0), N->getOperand(1)); + + assert((N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); + + // NEON uses the same intrinsics for both left and right shifts. For + // right shifts, the shift amounts are negative, so negate the vector of + // shift amounts. + EVT ShiftVT = N->getOperand(1).getValueType(); + SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, + getZeroVector(ShiftVT, DAG, dl), + N->getOperand(1)); + Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? + Intrinsic::arm_neon_vshifts : + Intrinsic::arm_neon_vshiftu); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(vshiftInt, MVT::i32), + N->getOperand(0), NegatedCount); +} + +static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); // We can get here for a node like i32 = ISD::SHL i32, i64 if (VT != MVT::i64) @@ -2853,13 +3110,13 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { AndOp = Op1; // Ignore bitconvert. - if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT) + if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) AndOp = AndOp.getOperand(0); if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { Opc = ARMISD::VTST; - Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0)); - Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1)); + Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); + Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); Invert = !Invert; } } @@ -2868,7 +3125,38 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (Swap) std::swap(Op0, Op1); - SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + // If one of the operands is a constant vector zero, attempt to fold the + // comparison to a specialized compare-against-zero form. + SDValue SingleOp; + if (ISD::isBuildVectorAllZeros(Op1.getNode())) + SingleOp = Op0; + else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { + if (Opc == ARMISD::VCGE) + Opc = ARMISD::VCLEZ; + else if (Opc == ARMISD::VCGT) + Opc = ARMISD::VCLTZ; + SingleOp = Op1; + } + + SDValue Result; + if (SingleOp.getNode()) { + switch (Opc) { + case ARMISD::VCEQ: + Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; + case ARMISD::VCGE: + Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; + case ARMISD::VCLEZ: + Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; + case ARMISD::VCGT: + Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; + case ARMISD::VCLTZ: + Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; + default: + Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + } + } else { + Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + } if (Invert) Result = DAG.getNOT(dl, Result, VT); @@ -2881,7 +3169,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { /// operand (e.g., VMOV). If so, return the encoded value. static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, - EVT &VT, bool is128Bits, bool isVMOV) { + EVT &VT, bool is128Bits, NEONModImmType type) { unsigned OpCmode, Imm; // SplatBitSize is set to the smallest size that splats the vector, so a @@ -2894,7 +3182,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, switch (SplatBitSize) { case 8: - if (!isVMOV) + if (type != VMOVModImm) return SDValue(); // Any 1-byte value is OK. Op=0, Cmode=1110. assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); @@ -2951,6 +3239,9 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, break; } + // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC + if (type == OtherModImm) return SDValue(); + if ((SplatBits & ~0xffff) == 0 && ((SplatBits | SplatUndef) & 0xff) == 0xff) { // Value = 0x0000nnff: Op=x, Cmode=1100. @@ -2977,7 +3268,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, return SDValue(); case 64: { - if (!isVMOV) + if (type != VMOVModImm) return SDValue(); // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. uint64_t BitMask = 0xff; @@ -3246,10 +3537,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, EVT VmovVT; SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, - DAG, VmovVT, VT.is128BitVector(), true); + DAG, VmovVT, VT.is128BitVector(), + VMOVModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); + return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } // Try an immediate VMVN. @@ -3257,10 +3549,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, ((1LL << SplatBitSize) - 1)); Val = isNEONModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, - DAG, VmovVT, VT.is128BitVector(), false); + DAG, VmovVT, VT.is128BitVector(), + VMVNModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); + return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } } } @@ -3294,26 +3587,25 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (EnableARMVDUPsplat) { - // Use VDUP for non-constant splats. For f32 constant splats, reduce to - // i32 and try again. - if (usesOnlyOneValue && EltSize <= 32) { - if (!isConstant) - return DAG.getNode(ARMISD::VDUP, dl, VT, Value); - if (VT.getVectorElementType().isFloatingPoint()) { - SmallVector Ops; - for (unsigned i = 0; i < NumElts; ++i) - Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, - Op.getOperand(i))); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0], - NumElts); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, - LowerBUILD_VECTOR(Val, DAG, ST)); - } - SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); + // Use VDUP for non-constant splats. For f32 constant splats, reduce to + // i32 and try again. + if (usesOnlyOneValue && EltSize <= 32) { + if (!isConstant) + return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + if (VT.getVectorElementType().isFloatingPoint()) { + SmallVector Ops; + for (unsigned i = 0; i < NumElts; ++i) + Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, + Op.getOperand(i))); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); + Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) - return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); + if (Val.getNode()) + return DAG.getNode(ARMISD::VDUP, dl, VT, Val); } // If all elements are constants and the case above didn't get hit, fall back @@ -3322,12 +3614,6 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (isConstant) return SDValue(); - if (!EnableARMVDUPsplat) { - // Use VDUP for non-constant splats. - if (usesOnlyOneValue && EltSize <= 32) - return DAG.getNode(ARMISD::VDUP, dl, VT, Value); - } - // Vectors with 32- or 64-bit elements can be built by directly assigning // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands // will be legalized. @@ -3338,9 +3624,9 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) - Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i))); + Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Val); } return SDValue(); @@ -3561,8 +3847,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // registers are defined to use, and since i64 is not legal. EVT EltVT = EVT::getFloatingPointVT(EltSize); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1); - V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2); + V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) { if (ShuffleMask[i] < 0) @@ -3574,21 +3860,26 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { MVT::i32))); } SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Val); } return SDValue(); } static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); - SDValue Vec = Op.getOperand(0); + // EXTRACT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(1); - assert(VT == MVT::i32 && - Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && - "unexpected type for custom-lowering vector extract"); - return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); + if (!isa(Lane)) + return SDValue(); + + SDValue Vec = Op.getOperand(0); + if (Op.getValueType() == MVT::i32 && + Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); + } + + return Op; } static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { @@ -3602,25 +3893,123 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { SDValue Op1 = Op.getOperand(1); if (Op0.getOpcode() != ISD::UNDEF) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0), + DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), DAG.getIntPtrConstant(0)); if (Op1.getOpcode() != ISD::UNDEF) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1), + DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), DAG.getIntPtrConstant(1)); - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); } -/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or -/// an extending load, return the unextended value. +/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each +/// element has been zero/sign-extended, depending on the isSigned parameter, +/// from an integer type half its size. +static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, + bool isSigned) { + // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. + EVT VT = N->getValueType(0); + if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { + SDNode *BVN = N->getOperand(0).getNode(); + if (BVN->getValueType(0) != MVT::v4i32 || + BVN->getOpcode() != ISD::BUILD_VECTOR) + return false; + unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; + unsigned HiElt = 1 - LoElt; + ConstantSDNode *Lo0 = dyn_cast(BVN->getOperand(LoElt)); + ConstantSDNode *Hi0 = dyn_cast(BVN->getOperand(HiElt)); + ConstantSDNode *Lo1 = dyn_cast(BVN->getOperand(LoElt+2)); + ConstantSDNode *Hi1 = dyn_cast(BVN->getOperand(HiElt+2)); + if (!Lo0 || !Hi0 || !Lo1 || !Hi1) + return false; + if (isSigned) { + if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && + Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) + return true; + } else { + if (Hi0->isNullValue() && Hi1->isNullValue()) + return true; + } + return false; + } + + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDNode *Elt = N->getOperand(i).getNode(); + if (ConstantSDNode *C = dyn_cast(Elt)) { + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + unsigned HalfSize = EltSize / 2; + if (isSigned) { + int64_t SExtVal = C->getSExtValue(); + if ((SExtVal >> HalfSize) != (SExtVal >> EltSize)) + return false; + } else { + if ((C->getZExtValue() >> HalfSize) != 0) + return false; + } + continue; + } + return false; + } + + return true; +} + +/// isSignExtended - Check if a node is a vector value that is sign-extended +/// or a constant BUILD_VECTOR with sign-extended elements. +static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) + return true; + if (isExtendedBUILD_VECTOR(N, DAG, true)) + return true; + return false; +} + +/// isZeroExtended - Check if a node is a vector value that is zero-extended +/// or a constant BUILD_VECTOR with zero-extended elements. +static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) + return true; + if (isExtendedBUILD_VECTOR(N, DAG, false)) + return true; + return false; +} + +/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending +/// load, or BUILD_VECTOR with extended elements, return the unextended value. static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) return N->getOperand(0); - LoadSDNode *LD = cast(N); - return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), - LD->getBasePtr(), LD->getSrcValue(), - LD->getSrcValueOffset(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + if (LoadSDNode *LD = dyn_cast(N)) + return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), + LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will + // have been legalized as a BITCAST from v4i32. + if (N->getOpcode() == ISD::BITCAST) { + SDNode *BVN = N->getOperand(0).getNode(); + assert(BVN->getOpcode() == ISD::BUILD_VECTOR && + BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); + unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, + BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); + } + // Construct a new BUILD_VECTOR with elements truncated to half the size. + assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); + EVT VT = N->getValueType(0); + unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; + unsigned NumElts = VT.getVectorNumElements(); + MVT TruncVT = MVT::getIntegerVT(EltSize); + SmallVector Ops; + for (unsigned i = 0; i != NumElts; ++i) { + ConstantSDNode *C = cast(N->getOperand(i)); + const APInt &CInt = C->getAPIntValue(); + Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); } static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { @@ -3631,19 +4020,16 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { SDNode *N0 = Op.getOperand(0).getNode(); SDNode *N1 = Op.getOperand(1).getNode(); unsigned NewOpc = 0; - if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) && - (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) { + if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG)) NewOpc = ARMISD::VMULLs; - } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) && - (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) { + else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG)) NewOpc = ARMISD::VMULLu; - } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) { + else if (VT == MVT::v2i64) // Fall through to expand this. It is not legal. return SDValue(); - } else { + else // Other vector multiplications are legal. return Op; - } // Legalize to a VMULL instruction. DebugLoc DL = Op.getDebugLoc(); @@ -3671,6 +4057,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); + case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: @@ -3681,9 +4068,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); - case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); + case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); @@ -3712,12 +4100,12 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, default: llvm_unreachable("Don't know how to custom expand this!"); break; - case ISD::BIT_CONVERT: - Res = ExpandBIT_CONVERT(N, DAG); + case ISD::BITCAST: + Res = ExpandBITCAST(N, DAG); break; case ISD::SRL: case ISD::SRA: - Res = LowerShift(N, DAG, Subtarget); + Res = Expand64BitShift(N, DAG, Subtarget); break; } if (Res.getNode()) @@ -4196,10 +4584,6 @@ static SDValue PerformMULCombine(SDNode *N, if (Subtarget->isThumb1Only()) return SDValue(); - if (DAG.getMachineFunction(). - getFunction()->hasFnAttr(Attribute::OptimizeForSize)) - return SDValue(); - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); @@ -4244,10 +4628,67 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } +static SDValue PerformANDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // Attempt to use immediate-form VBIC + BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN && + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + if (SplatBitSize <= 64) { + EVT VbicVT; + SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, + DAG, VbicVT, VT.is128BitVector(), + OtherModImm); + if (Val.getNode()) { + SDValue Input = + DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); + SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); + } + } + } + + return SDValue(); +} + /// PerformORCombine - Target-specific dag combine xforms for ISD::OR static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { + // Attempt to use immediate-form VORR + BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN && Subtarget->hasNEON() && + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + if (SplatBitSize <= 64) { + EVT VorrVT; + SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, + DAG, VorrVT, VT.is128BitVector(), + OtherModImm); + if (Val.getNode()) { + SDValue Input = + DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); + SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); + } + } + } + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when // reasonable. @@ -4255,7 +4696,6 @@ static SDValue PerformORCombine(SDNode *N, if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); - SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); DebugLoc DL = N->getDebugLoc(); // 1) or (and A, mask), val => ARMbfi A, val, mask @@ -4270,7 +4710,6 @@ static SDValue PerformORCombine(SDNode *N, if (N0.getOpcode() != ISD::AND) return SDValue(); - EVT VT = N->getValueType(0); if (VT != MVT::i32) return SDValue(); @@ -4343,6 +4782,35 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); } +/// PerformVMOVRRDCombine - Target-specific dag combine xforms for +/// ARMISD::VMOVRRD. +static SDValue PerformVMOVRRDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // vmovrrd(vmovdrr x, y) -> x,y + SDValue InDouble = N->getOperand(0); + if (InDouble.getOpcode() == ARMISD::VMOVDRR) + return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); + return SDValue(); +} + +/// PerformVMOVDRRCombine - Target-specific dag combine xforms for +/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. +static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { + // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Op0.getOpcode() == ISD::BITCAST) + Op0 = Op0.getOperand(0); + if (Op1.getOpcode() == ISD::BITCAST) + Op1 = Op1.getOperand(0); + if (Op0.getOpcode() == ARMISD::VMOVRRD && + Op0.getNode() == Op1.getNode() && + Op0.getResNo() == 0 && Op1.getResNo() == 1) + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + N->getValueType(0), Op0.getOperand(0)); + return SDValue(); +} + /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for /// ISD::BUILD_VECTOR. static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG) { @@ -4350,46 +4818,160 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG) { // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value // into a pair of GPRs, which is fine when the value is used as a scalar, // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. - if (N->getNumOperands() == 2) { - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - if (Op0.getOpcode() == ISD::BIT_CONVERT) - Op0 = Op0.getOperand(0); - if (Op1.getOpcode() == ISD::BIT_CONVERT) - Op1 = Op1.getOperand(0); - if (Op0.getOpcode() == ARMISD::VMOVRRD && - Op0.getNode() == Op1.getNode() && - Op0.getResNo() == 0 && Op1.getResNo() == 1) { - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), - N->getValueType(0), Op0.getOperand(0)); - } - } + if (N->getNumOperands() == 2) + return PerformVMOVDRRCombine(N, DAG); return SDValue(); } -/// PerformVMOVRRDCombine - Target-specific dag combine xforms for -/// ARMISD::VMOVRRD. -static SDValue PerformVMOVRRDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - // fmrrd(fmdrr x, y) -> x,y - SDValue InDouble = N->getOperand(0); - if (InDouble.getOpcode() == ARMISD::VMOVDRR) - return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); - return SDValue(); +/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for +/// ISD::VECTOR_SHUFFLE. +static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { + // The LLVM shufflevector instruction does not require the shuffle mask + // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does + // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the + // operands do not match the mask length, they are extended by concatenating + // them with undef vectors. That is probably the right thing for other + // targets, but for NEON it is better to concatenate two double-register + // size vector operands into a single quad-register size vector. Do that + // transformation here: + // shuffle(concat(v1, undef), concat(v2, undef)) -> + // shuffle(concat(v1, v2), undef) + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Op0.getOpcode() != ISD::CONCAT_VECTORS || + Op1.getOpcode() != ISD::CONCAT_VECTORS || + Op0.getNumOperands() != 2 || + Op1.getNumOperands() != 2) + return SDValue(); + SDValue Concat0Op1 = Op0.getOperand(1); + SDValue Concat1Op1 = Op1.getOperand(1); + if (Concat0Op1.getOpcode() != ISD::UNDEF || + Concat1Op1.getOpcode() != ISD::UNDEF) + return SDValue(); + // Skip the transformation if any of the types are illegal. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = N->getValueType(0); + if (!TLI.isTypeLegal(VT) || + !TLI.isTypeLegal(Concat0Op1.getValueType()) || + !TLI.isTypeLegal(Concat1Op1.getValueType())) + return SDValue(); + + SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + Op0.getOperand(0), Op1.getOperand(0)); + // Translate the shuffle mask. + SmallVector NewMask; + unsigned NumElts = VT.getVectorNumElements(); + unsigned HalfElts = NumElts/2; + ShuffleVectorSDNode *SVN = cast(N); + for (unsigned n = 0; n < NumElts; ++n) { + int MaskElt = SVN->getMaskElt(n); + int NewElt = -1; + if (MaskElt < (int)HalfElts) + NewElt = MaskElt; + else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) + NewElt = HalfElts + MaskElt - NumElts; + NewMask.push_back(NewElt); + } + return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat, + DAG.getUNDEF(VT), NewMask.data()); +} + +/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a +/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic +/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and +/// return true. +static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + // vldN-dup instructions only support 64-bit vectors for N > 1. + if (!VT.is64BitVector()) + return false; + + // Check if the VDUPLANE operand is a vldN-dup intrinsic. + SDNode *VLD = N->getOperand(0).getNode(); + if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) + return false; + unsigned NumVecs = 0; + unsigned NewOpc = 0; + unsigned IntNo = cast(VLD->getOperand(1))->getZExtValue(); + if (IntNo == Intrinsic::arm_neon_vld2lane) { + NumVecs = 2; + NewOpc = ARMISD::VLD2DUP; + } else if (IntNo == Intrinsic::arm_neon_vld3lane) { + NumVecs = 3; + NewOpc = ARMISD::VLD3DUP; + } else if (IntNo == Intrinsic::arm_neon_vld4lane) { + NumVecs = 4; + NewOpc = ARMISD::VLD4DUP; + } else { + return false; + } + + // First check that all the vldN-lane uses are VDUPLANEs and that the lane + // numbers match the load. + unsigned VLDLaneNo = + cast(VLD->getOperand(NumVecs+3))->getZExtValue(); + for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); + UI != UE; ++UI) { + // Ignore uses of the chain result. + if (UI.getUse().getResNo() == NumVecs) + continue; + SDNode *User = *UI; + if (User->getOpcode() != ARMISD::VDUPLANE || + VLDLaneNo != cast(User->getOperand(1))->getZExtValue()) + return false; + } + + // Create the vldN-dup node. + EVT Tys[5]; + unsigned n; + for (n = 0; n < NumVecs; ++n) + Tys[n] = VT; + Tys[n] = MVT::Other; + SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); + SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; + MemIntrinsicSDNode *VLDMemInt = cast(VLD); + SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys, + Ops, 2, VLDMemInt->getMemoryVT(), + VLDMemInt->getMemOperand()); + + // Update the uses. + for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); + UI != UE; ++UI) { + unsigned ResNo = UI.getUse().getResNo(); + // Ignore uses of the chain result. + if (ResNo == NumVecs) + continue; + SDNode *User = *UI; + DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); + } + + // Now the vldN-lane intrinsic is dead except for its chain result. + // Update uses of the chain. + std::vector VLDDupResults; + for (unsigned n = 0; n < NumVecs; ++n) + VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); + VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); + DCI.CombineTo(VLD, VLDDupResults); + + return true; } /// PerformVDUPLANECombine - Target-specific dag combine xforms for /// ARMISD::VDUPLANE. static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is - // redundant. SDValue Op = N->getOperand(0); - EVT VT = N->getValueType(0); - // Ignore bit_converts. - while (Op.getOpcode() == ISD::BIT_CONVERT) + // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses + // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. + if (CombineVLDDUP(N, DCI)) + return SDValue(N, 0); + + // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is + // redundant. Ignore bit_converts for now; element sizes are checked below. + while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) return SDValue(); @@ -4401,11 +4983,11 @@ static SDValue PerformVDUPLANECombine(SDNode *N, unsigned EltBits; if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) EltSize = 8; + EVT VT = N->getValueType(0); if (EltSize > VT.getVectorElementType().getSizeInBits()) return SDValue(); - SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); - return DCI.CombineTo(N, Res, false); + return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); } /// getVShiftImm - Check if this is a valid build_vector for the immediate @@ -4413,7 +4995,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N, /// build_vector must have the same constant integer value. static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { // Ignore bit_converts. - while (Op.getOpcode() == ISD::BIT_CONVERT) + while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); APInt SplatBits, SplatUndef; @@ -4627,7 +5209,8 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); // Nothing to be done for scalar shifts. - if (! VT.isVector()) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!VT.isVector() || !TLI.isTypeLegal(VT)) return SDValue(); assert(ST->hasNEON() && "unexpected vector shift"); @@ -4673,7 +5256,8 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, if (VT == MVT::i32 && (EltVT == MVT::i8 || EltVT == MVT::i16) && - TLI.isTypeLegal(Vec.getValueType())) { + TLI.isTypeLegal(Vec.getValueType()) && + isa(Lane)) { unsigned Opc = 0; switch (N->getOpcode()) { @@ -4786,8 +5370,11 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); - case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI.DAG); + case ISD::AND: return PerformANDCombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); + case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); + case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI.DAG); + case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: @@ -4802,15 +5389,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, } bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { - if (!Subtarget->hasV6Ops()) - // Pre-v6 does not support unaligned mem access. - return false; - - // v6+ may or may not support unaligned mem access depending on the system - // configuration. - // FIXME: This is pretty conservative. Should we provide cmdline option to - // control the behaviour? - if (!Subtarget->isTargetDarwin()) + if (!Subtarget->allowsUnalignedMem()) return false; switch (VT.getSimpleVT().SimpleTy) { @@ -5243,6 +5822,40 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +ARMTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + const Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'l': + if (type->isIntegerTy()) { + if (Subtarget->isThumb()) + weight = CW_SpecificReg; + else + weight = CW_Register; + } + break; + case 'w': + if (type->isFloatingPointTy()) + weight = CW_Register; + break; + } + return weight; +} + std::pair ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { @@ -5545,3 +6158,63 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return ARM::getVFPf64Imm(Imm) != -1; return false; } + +/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as +/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment +/// specified in the intrinsic calls. +bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + unsigned Intrinsic) const { + switch (Intrinsic) { + case Intrinsic::arm_neon_vld1: + case Intrinsic::arm_neon_vld2: + case Intrinsic::arm_neon_vld3: + case Intrinsic::arm_neon_vld4: + case Intrinsic::arm_neon_vld2lane: + case Intrinsic::arm_neon_vld3lane: + case Intrinsic::arm_neon_vld4lane: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + // Conservatively set memVT to the entire set of vectors loaded. + uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8; + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); + Info.align = cast(AlignArg)->getZExtValue(); + Info.vol = false; // volatile loads with NEON intrinsics not supported + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::arm_neon_vst1: + case Intrinsic::arm_neon_vst2: + case Intrinsic::arm_neon_vst3: + case Intrinsic::arm_neon_vst4: + case Intrinsic::arm_neon_vst2lane: + case Intrinsic::arm_neon_vst3lane: + case Intrinsic::arm_neon_vst4lane: { + Info.opc = ISD::INTRINSIC_VOID; + // Conservatively set memVT to the entire set of vectors stored. + unsigned NumElts = 0; + for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { + const Type *ArgTy = I.getArgOperand(ArgI)->getType(); + if (!ArgTy->isVectorTy()) + break; + NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; + } + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); + Info.align = cast(AlignArg)->getZExtValue(); + Info.vol = false; // volatile stores with NEON intrinsics not supported + Info.readMem = false; + Info.writeMem = true; + return true; + } + default: + break; + } + + return false; +}