X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMISelLowering.cpp;h=b82a2209e550bc7f7d15e32ed50efa4d53a44176;hb=235e2f6a68b5f37d6c1b554330eebc8d32f1aca9;hp=4922f7d12e73ea3b32c3bbf6a4b09fe8a68fbdcf;hpb=861986401e05e437cb33bfd8320d510b956fe41e;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 4922f7d12e7..b82a2209e55 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -17,6 +17,7 @@ #include "ARMConstantPoolValue.h" #include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" +#include "ARMPerfectShuffle.h" #include "ARMRegisterInfo.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" @@ -39,67 +40,87 @@ #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include using namespace llvm; -static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, - MVT PromotedBitwiseVT) { +void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, + EVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { - setOperationAction(ISD::LOAD, VT, Promote); - AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); + setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); + AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); - setOperationAction(ISD::STORE, VT, Promote); - AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); + setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); + AddPromotedToType (ISD::STORE, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); } - MVT ElemTy = VT.getVectorElementType(); + EVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) - setOperationAction(ISD::VSETCC, VT, Custom); + setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); if (ElemTy == MVT::i8 || ElemTy == MVT::i16) - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); + if (ElemTy != MVT::i32) { + setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); + setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); + } + setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); if (VT.isInteger()) { - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); - setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); } // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { - setOperationAction(ISD::AND, VT, Promote); - AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); - setOperationAction(ISD::OR, VT, Promote); - AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); - setOperationAction(ISD::XOR, VT, Promote); - AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); - } -} - -void ARMTargetLowering::addDRTypeForNEON(MVT VT) { + setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); + AddPromotedToType (ISD::AND, VT.getSimpleVT(), + PromotedBitwiseVT.getSimpleVT()); + setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); + AddPromotedToType (ISD::OR, VT.getSimpleVT(), + PromotedBitwiseVT.getSimpleVT()); + setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); + AddPromotedToType (ISD::XOR, VT.getSimpleVT(), + PromotedBitwiseVT.getSimpleVT()); + } + + // Neon does not support vector divide/remainder operations. + setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); + setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); + setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); +} + +void ARMTargetLowering::addDRTypeForNEON(EVT VT) { addRegisterClass(VT, ARM::DPRRegisterClass); addTypeForNEON(VT, MVT::f64, MVT::v2i32); } -void ARMTargetLowering::addQRTypeForNEON(MVT VT) { +void ARMTargetLowering::addQRTypeForNEON(EVT VT) { addRegisterClass(VT, ARM::QPRRegisterClass); addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } @@ -196,6 +217,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SRL_I128, 0); setLibcallName(RTLIB::SRA_I128, 0); + // Libcalls should use the AAPCS base standard ABI, even if hard float + // is in effect, as per the ARM RTABI specification, section 4.1.2. + if (Subtarget->isAAPCS_ABI()) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { + setLibcallCallingConv(static_cast(i), + CallingConv::ARM_AAPCS); + } + } + if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else @@ -221,6 +251,39 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addQRTypeForNEON(MVT::v4i32); addQRTypeForNEON(MVT::v2i64); + // v2f64 is legal so that QR subregs can be extracted as f64 elements, but + // neither Neon nor VFP support any arithmetic operations on it. + setOperationAction(ISD::FADD, MVT::v2f64, Expand); + setOperationAction(ISD::FSUB, MVT::v2f64, Expand); + setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + setOperationAction(ISD::FDIV, MVT::v2f64, Expand); + setOperationAction(ISD::FREM, MVT::v2f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); + setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); + setOperationAction(ISD::FNEG, MVT::v2f64, Expand); + setOperationAction(ISD::FABS, MVT::v2f64, Expand); + setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); + setOperationAction(ISD::FSIN, MVT::v2f64, Expand); + setOperationAction(ISD::FCOS, MVT::v2f64, Expand); + setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); + setOperationAction(ISD::FPOW, MVT::v2f64, Expand); + setOperationAction(ISD::FLOG, MVT::v2f64, Expand); + setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); + setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); + setOperationAction(ISD::FEXP, MVT::v2f64, Expand); + setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); + setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); + setOperationAction(ISD::FRINT, MVT::v2f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); + + // Neon does not support some operations on v1i64 and v2i64 types. + setOperationAction(ISD::MUL, MVT::v1i64, Expand); + setOperationAction(ISD::MUL, MVT::v2i64, Expand); + setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); + setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRL); @@ -307,6 +370,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + // FIXME: Shouldn't need this, since no register is used, but the legalizer + // doesn't yet know how to not do that for SjLj. + setExceptionSelectorRegister(ARM::R0); if (Subtarget->isThumb()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); else @@ -325,8 +392,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); @@ -373,26 +438,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setStackPointerRegisterToSaveRestore(ARM::SP); setSchedulingPreference(SchedulingForRegPressure); - setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10); - setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2); - - if (!Subtarget->isThumb()) { - // Use branch latency information to determine if-conversion limits. - // FIXME: If-converter should use instruction latency of the branch being - // eliminated to compute the threshold. For ARMv6, the branch "latency" - // varies depending on whether it's dynamically or statically predicted - // and on whether the destination is in the prefetch buffer. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData(); - unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass()); - if (Latency > 1) { - setIfCvtBlockSizeLimit(Latency-1); - if (Latency > 2) - setIfCvtDupBlockSizeLimit(Latency-2); - } else { - setIfCvtBlockSizeLimit(10); - setIfCvtDupBlockSizeLimit(2); - } + + // FIXME: If-converter should use instruction latency to determine + // profitability rather than relying on fixed limits. + if (Subtarget->getCPUString() == "generic") { + // Generic (and overly aggressive) if-conversion limits. + setIfCvtBlockSizeLimit(10); + setIfCvtDupBlockSizeLimit(2); + } else if (Subtarget->hasV6Ops()) { + setIfCvtBlockSizeLimit(2); + setIfCvtDupBlockSizeLimit(1); + } else { + setIfCvtBlockSizeLimit(3); + setIfCvtDupBlockSizeLimit(2); } maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type @@ -468,19 +526,21 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; - case ARMISD::VDUPLANEQ: return "ARMISD::VDUPLANEQ"; - case ARMISD::VLD2D: return "ARMISD::VLD2D"; - case ARMISD::VLD3D: return "ARMISD::VLD3D"; - case ARMISD::VLD4D: return "ARMISD::VLD4D"; - case ARMISD::VST2D: return "ARMISD::VST2D"; - case ARMISD::VST3D: return "ARMISD::VST3D"; - case ARMISD::VST4D: return "ARMISD::VST4D"; + case ARMISD::VDUP: return "ARMISD::VDUP"; + case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; + case ARMISD::VEXT: return "ARMISD::VEXT"; + case ARMISD::VREV64: return "ARMISD::VREV64"; + case ARMISD::VREV32: return "ARMISD::VREV32"; + case ARMISD::VREV16: return "ARMISD::VREV16"; + case ARMISD::VZIP: return "ARMISD::VZIP"; + case ARMISD::VUZP: return "ARMISD::VUZP"; + case ARMISD::VTRN: return "ARMISD::VTRN"; } } /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { - return getTargetMachine().getSubtarget().isThumb() ? 1 : 2; + return getTargetMachine().getSubtarget().isThumb() ? 0 : 1; } //===----------------------------------------------------------------------===// @@ -504,12 +564,9 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { } } -/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It -/// returns true if the operands should be inverted to form the proper -/// comparison. -static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, +/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. +static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2) { - bool Invert = false; CondCode2 = ARMCC::AL; switch (CC) { default: llvm_unreachable("Unknown FP condition!"); @@ -520,7 +577,7 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, case ISD::SETGE: case ISD::SETOGE: CondCode = ARMCC::GE; break; case ISD::SETOLT: CondCode = ARMCC::MI; break; - case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break; + case ISD::SETOLE: CondCode = ARMCC::LS; break; case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; case ISD::SETO: CondCode = ARMCC::VC; break; case ISD::SETUO: CondCode = ARMCC::VS; break; @@ -534,7 +591,6 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, case ISD::SETNE: case ISD::SETUNE: CondCode = ARMCC::NE; break; } - return Invert; } //===----------------------------------------------------------------------===// @@ -544,7 +600,7 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, #include "ARMGenCallingConv.inc" // APCS f64 is in register pairs, possibly split to stack -static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -574,7 +630,7 @@ static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; } -static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { @@ -587,7 +643,7 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, } // AAPCS f64 is in aligned register pairs -static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; @@ -617,7 +673,7 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; } -static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { @@ -629,7 +685,7 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; // we handled it } -static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State) { static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; @@ -649,7 +705,7 @@ static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; } -static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { @@ -660,7 +716,7 @@ static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; // we handled it } -static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { @@ -670,7 +726,7 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, /// CCAssignFnForNode - Selects the correct CCAssignFn for a the /// given CallingConvention value. -CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC, +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const { switch (CC) { @@ -700,7 +756,7 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC, /// appropriate copies out of appropriate physical registers. SDValue ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - unsigned CallConv, bool isVarArg, + CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) { @@ -831,7 +887,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, /// nodes. SDValue ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - unsigned CallConv, bool isVarArg, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl &Outs, const SmallVectorImpl &Ins, @@ -954,8 +1010,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, isLocalARMFunc = !Subtarget->isThumb() && !isExt; // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, - ARMCP::CPStub, 4); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, + ARMPCLabelIndex, + ARMCP::CPValue, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, @@ -973,8 +1030,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // tBX takes a register source operand. const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(Sym, ARMPCLabelIndex, - ARMCP::CPStub, 4); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), + Sym, ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, @@ -1034,7 +1091,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue ARMTargetLowering::LowerReturn(SDValue Chain, - unsigned CallConv, bool isVarArg, + CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, DebugLoc dl, SelectionDAG &DAG) { @@ -1129,7 +1186,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // be used to form addressing mode. These wrapped nodes will be selected // into MOVi. static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); // FIXME there is no actual debug info here DebugLoc dl = Op.getDebugLoc(); ConstantPoolSDNode *CP = cast(Op); @@ -1148,11 +1205,11 @@ SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) { DebugLoc dl = GA->getDebugLoc(); - MVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, - PCAdj, "tlsgd", true); + new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, "tlsgd", true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0); @@ -1165,11 +1222,12 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; - Entry.Ty = (const Type *) Type::Int32Ty; + Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); // FIXME: is there useful debug info available here? std::pair CallResult = - LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false, + LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), + false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); return CallResult.first; @@ -1184,7 +1242,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, DebugLoc dl = GA->getDebugLoc(); SDValue Offset; SDValue Chain = DAG.getEntryNode(); - MVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(); // Get the Thread Pointer SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); @@ -1192,8 +1250,8 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, // initial exec model unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, - PCAdj, "gottpoff", true); + new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, "gottpoff", true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); @@ -1205,8 +1263,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); } else { // local exec model - ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff"); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); @@ -1233,59 +1290,47 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) { - MVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); GlobalValue *GV = cast(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); if (RelocM == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT"); + new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT"); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), - CPAddr, NULL, 0); + CPAddr, + PseudoSourceValue::getConstantPool(), 0); SDValue Chain = Result.getValue(1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) - Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); + Result = DAG.getLoad(PtrVT, dl, Chain, Result, + PseudoSourceValue::getGOT(), 0); return Result; } else { SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); } } -/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol -/// even in non-static mode. -static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) { - // If symbol visibility is hidden, the extra load is not needed if - // the symbol is definitely defined in the current translation unit. - bool isDecl = GV->isDeclaration() || GV->hasAvailableExternallyLinkage(); - if (GV->hasHiddenVisibility() && (!isDecl && !GV->hasCommonLinkage())) - return false; - return RelocM != Reloc::Static && (isDecl || GV->isWeakForLinker()); -} - SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) { - MVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); GlobalValue *GV = cast(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - bool IsIndirect = GVIsIndirectSymbol(GV, RelocM); SDValue CPAddr; if (RelocM == Reloc::Static) CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); else { - unsigned PCAdj = (RelocM != Reloc::PIC_) - ? 0 : (Subtarget->isThumb() ? 4 : 8); - ARMCP::ARMCPKind Kind = IsIndirect ? ARMCP::CPNonLazyPtr - : ARMCP::CPValue; - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, - Kind, PCAdj); + unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -1297,7 +1342,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } - if (IsIndirect) + + if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); return Result; @@ -1307,76 +1353,20 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG){ assert(Subtarget->isTargetELF() && "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); - MVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; - ARMConstantPoolValue *CPV = new ARMConstantPoolValue("_GLOBAL_OFFSET_TABLE_", - ARMPCLabelIndex, - ARMCP::CPValue, PCAdj); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), + "_GLOBAL_OFFSET_TABLE_", + ARMPCLabelIndex, PCAdj); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } -static SDValue LowerNeonVLDIntrinsic(SDValue Op, SelectionDAG &DAG, - unsigned Opcode) { - SDNode *Node = Op.getNode(); - MVT VT = Node->getValueType(0); - DebugLoc dl = Op.getDebugLoc(); - - if (!VT.is64BitVector()) - return SDValue(); // unimplemented - - SDValue Ops[] = { Node->getOperand(0), - Node->getOperand(2) }; - return DAG.getNode(Opcode, dl, Node->getVTList(), Ops, 2); -} - -static SDValue LowerNeonVSTIntrinsic(SDValue Op, SelectionDAG &DAG, - unsigned Opcode, unsigned NumVecs) { - SDNode *Node = Op.getNode(); - MVT VT = Node->getOperand(3).getValueType(); - DebugLoc dl = Op.getDebugLoc(); - - if (!VT.is64BitVector()) - return SDValue(); // unimplemented - - SmallVector Ops; - Ops.push_back(Node->getOperand(0)); - Ops.push_back(Node->getOperand(2)); - for (unsigned N = 0; N < NumVecs; ++N) - Ops.push_back(Node->getOperand(N + 3)); - return DAG.getNode(Opcode, dl, MVT::Other, Ops.data(), Ops.size()); -} - -SDValue -ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { - unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); - switch (IntNo) { - case Intrinsic::arm_neon_vld2i: - case Intrinsic::arm_neon_vld2f: - return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD2D); - case Intrinsic::arm_neon_vld3i: - case Intrinsic::arm_neon_vld3f: - return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD3D); - case Intrinsic::arm_neon_vld4i: - case Intrinsic::arm_neon_vld4f: - return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D); - case Intrinsic::arm_neon_vst2i: - case Intrinsic::arm_neon_vst2f: - return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST2D, 2); - case Intrinsic::arm_neon_vst3i: - case Intrinsic::arm_neon_vst3f: - return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST3D, 3); - case Intrinsic::arm_neon_vst4i: - case Intrinsic::arm_neon_vst4f: - return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST4D, 4); - default: return SDValue(); // Don't custom lower most intrinsics. - } -} - SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); @@ -1384,9 +1374,32 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::arm_thread_pointer: { - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); } + case Intrinsic::eh_sjlj_lsda: { + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + SDValue CPAddr; + unsigned PCAdj = (RelocM != Reloc::PIC_) + ? 0 : (Subtarget->isThumb() ? 4 : 8); + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, + ARMCP::CPLSDA, PCAdj); + CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue Result = + DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue Chain = Result.getValue(1); + + if (RelocM == Reloc::PIC_) { + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + } + return Result; + } case Intrinsic::eh_sjlj_setjmp: return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1)); } @@ -1397,7 +1410,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. DebugLoc dl = Op.getDebugLoc(); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); @@ -1407,7 +1420,7 @@ SDValue ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); SDValue Align = Op.getOperand(2); @@ -1486,7 +1499,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue ARMTargetLowering::LowerFormalArguments(SDValue Chain, - unsigned CallConv, bool isVarArg, + CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, @@ -1512,7 +1525,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Arguments stored in registers. if (VA.isRegLoc()) { - MVT RegVT = VA.getLocVT(); + EVT RegVT = VA.getLocVT(); SDValue ArgValue; if (VA.needsCustom()) { @@ -1734,7 +1747,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(4))->get(); @@ -1750,8 +1763,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, } ARMCC::CondCodes CondCode, CondCode2; - if (FPCCToARMCC(CC, CondCode, CondCode2)) - std::swap(TrueVal, FalseVal); + FPCCToARMCC(CC, CondCode, CondCode2); SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); @@ -1787,9 +1799,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); ARMCC::CondCodes CondCode, CondCode2; - if (FPCCToARMCC(CC, CondCode, CondCode2)) - // Swap the LHS/RHS of the comparison if needed. - std::swap(LHS, RHS); + FPCCToARMCC(CC, CondCode, CondCode2); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); @@ -1811,7 +1821,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { SDValue Index = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - MVT PTy = getPointerTy(); + EVT PTy = getPointerTy(); JumpTableSDNode *JT = cast(Table); ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo(); SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); @@ -1828,7 +1838,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { Addr, Op.getOperand(2), JTI, UId); } if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - Addr = DAG.getLoad((MVT)MVT::i32, dl, Chain, Addr, NULL, 0); + Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, NULL, 0); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); @@ -1848,7 +1858,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned Opc = Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF; @@ -1862,8 +1872,8 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { SDValue Tmp0 = Op.getOperand(0); SDValue Tmp1 = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); - MVT VT = Op.getValueType(); - MVT SrcVT = Tmp1.getValueType(); + EVT VT = Op.getValueType(); + EVT SrcVT = Tmp1.getValueType(); SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl); SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); @@ -1874,7 +1884,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) @@ -1909,7 +1919,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, unsigned BytesLeft = SizeVal & 3; unsigned NumMemOps = SizeVal >> 2; unsigned EmittedNumMemOps = 0; - MVT VT = MVT::i32; + EVT VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; const unsigned MAX_LOADS_IN_LDM = 6; @@ -2015,45 +2025,55 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { /// getZeroVector - Returns a vector of specified type with all zero elements. /// -static SDValue getZeroVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { +static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); // Zero vectors are used to represent vector negation and in those cases // will be implemented with the NEON VNEG instruction. However, VNEG does // not support i64 elements, so sometimes the zero vectors will need to be // explicitly constructed. For those cases, and potentially other uses in - // the future, always build zero vectors as <4 x i32> or <2 x i32> bitcasted + // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - if (VT.getSizeInBits() == 64) - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - else - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + SDValue Cst = DAG.getTargetConstant(0, MVT::i8); + SmallVector Ops; + MVT TVT; + + if (VT.getSizeInBits() == 64) { + Ops.assign(8, Cst); TVT = MVT::v8i8; + } else { + Ops.assign(16, Cst); TVT = MVT::v16i8; + } + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } /// getOnesVector - Returns a vector of specified type with all bits set. /// -static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { +static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest - // type. This ensures they get CSE'd. + // Always build ones vectors as <16 x i32> or <8 x i32> bitcasted to their + // dest type. This ensures they get CSE'd. SDValue Vec; - SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); - if (VT.getSizeInBits() == 64) - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - else - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8); + SmallVector Ops; + MVT TVT; + + if (VT.getSizeInBits() == 64) { + Ops.assign(8, Cst); TVT = MVT::v8i8; + } else { + Ops.assign(16, Cst); TVT = MVT::v16i8; + } + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); // Lower vector shifts on NEON to use VSHL. @@ -2072,7 +2092,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, // NEON uses the same intrinsics for both left and right shifts. For // right shifts, the shift amounts are negative, so negate the vector of // shift amounts. - MVT ShiftVT = N->getOperand(1).getValueType(); + EVT ShiftVT = N->getOperand(1).getValueType(); SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1)); @@ -2084,8 +2104,11 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, N->getOperand(0), NegatedCount); } - assert(VT == MVT::i64 && - (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && + // We can get here for a node like i32 = ISD::SHL i32, i64 + if (VT != MVT::i64) + return SDValue(); + + assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && "Unknown shift to lower!"); // We only lower SRA, SRL of 1 here, all others use generic lowering. @@ -2123,7 +2146,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast(CC)->get(); DebugLoc dl = Op.getDebugLoc(); @@ -2299,23 +2322,53 @@ SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { SplatBitSize, DAG); } +static bool isVEXTMask(const SmallVectorImpl &M, EVT VT, + bool &ReverseVEXT, unsigned &Imm) { + unsigned NumElts = VT.getVectorNumElements(); + ReverseVEXT = false; + Imm = M[0]; + + // If this is a VEXT shuffle, the immediate value is the index of the first + // element. The other shuffle indices must be the successive elements after + // the first one. + unsigned ExpectedElt = Imm; + for (unsigned i = 1; i < NumElts; ++i) { + // Increment the expected index. If it wraps around, it may still be + // a VEXT but the source vectors must be swapped. + ExpectedElt += 1; + if (ExpectedElt == NumElts * 2) { + ExpectedElt = 0; + ReverseVEXT = true; + } + + if (ExpectedElt != static_cast(M[i])) + return false; + } + + // Adjust the index value if the source operands will be swapped. + if (ReverseVEXT) + Imm -= NumElts; + + return true; +} + /// isVREVMask - Check if a vector shuffle corresponds to a VREV /// instruction with the specified blocksize. (The order of the elements /// within each block of the vector is reversed.) -bool ARM::isVREVMask(ShuffleVectorSDNode *N, unsigned BlockSize) { +static bool isVREVMask(const SmallVectorImpl &M, EVT VT, + unsigned BlockSize) { assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"); - MVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); unsigned EltSz = VT.getVectorElementType().getSizeInBits(); - unsigned BlockElts = N->getMaskElt(0) + 1; + unsigned BlockElts = M[0] + 1; if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) return false; for (unsigned i = 0; i < NumElts; ++i) { - if ((unsigned) N->getMaskElt(i) != + if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) return false; } @@ -2323,15 +2376,62 @@ bool ARM::isVREVMask(ShuffleVectorSDNode *N, unsigned BlockSize) { return true; } -static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) { +static bool isVTRNMask(const SmallVectorImpl &M, EVT VT, + unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i < NumElts; i += 2) { + if ((unsigned) M[i] != i + WhichResult || + (unsigned) M[i+1] != i + NumElts + WhichResult) + return false; + } + return true; +} + +static bool isVUZPMask(const SmallVectorImpl &M, EVT VT, + unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i != NumElts; ++i) { + if ((unsigned) M[i] != 2 * i + WhichResult) + return false; + } + + // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. + if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32) + return false; + + return true; +} + +static bool isVZIPMask(const SmallVectorImpl &M, EVT VT, + unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned i = 0; i != NumElts; i += 2) { + if ((unsigned) M[i] != Idx || + (unsigned) M[i+1] != Idx + NumElts) + return false; + Idx += 1; + } + + // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. + if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32) + return false; + + return true; +} + +static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) { // Canonicalize all-zeros and all-ones vectors. - ConstantSDNode *ConstVal = dyn_cast(Val.getNode()); + ConstantSDNode *ConstVal = cast(Val.getNode()); if (ConstVal->isNullValue()) return getZeroVector(VT, DAG, dl); if (ConstVal->isAllOnesValue()) return getOnesVector(VT, DAG, dl); - MVT CanonicalVT; + EVT CanonicalVT; if (VT.is64BitVector()) { switch (Val.getValueType().getSizeInBits()) { case 8: CanonicalVT = MVT::v8i8; break; @@ -2362,19 +2462,20 @@ static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) { // If this is a case we can't handle, return null and let the default // expansion code take care of it. static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { - BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); - assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + BuildVectorSDNode *BVN = cast(Op.getNode()); DebugLoc dl = Op.getDebugLoc(); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { - SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, DAG); - if (Val.getNode()) - return BuildSplat(Val, VT, DAG, dl); + if (SplatBitSize <= 64) { + SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, DAG); + if (Val.getNode()) + return BuildSplat(Val, VT, DAG, dl); + } } // If there are only 2 elements in a 128-bit vector, insert them into an @@ -2397,24 +2498,207 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return SDValue(); } -static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { - return Op; +/// isShuffleMaskLegal - Targets can use this to indicate that they only +/// support *some* VECTOR_SHUFFLE operations, those with specific masks. +/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values +/// are assumed to be legal. +bool +ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, + EVT VT) const { + if (VT.getVectorNumElements() == 4 && + (VT.is128BitVector() || VT.is64BitVector())) { + unsigned PFIndexes[4]; + for (unsigned i = 0; i != 4; ++i) { + if (M[i] < 0) + PFIndexes[i] = 8; + else + PFIndexes[i] = M[i]; + } + + // Compute the index in the perfect shuffle table. + unsigned PFTableIndex = + PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; + unsigned Cost = (PFEntry >> 30); + + if (Cost <= 4) + return true; + } + + bool ReverseVEXT; + unsigned Imm, WhichResult; + + return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || + isVREVMask(M, VT, 64) || + isVREVMask(M, VT, 32) || + isVREVMask(M, VT, 16) || + isVEXTMask(M, VT, ReverseVEXT, Imm) || + isVTRNMask(M, VT, WhichResult) || + isVUZPMask(M, VT, WhichResult) || + isVZIPMask(M, VT, WhichResult)); +} + +/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit +/// the specified operations to build the shuffle. +static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, + SDValue RHS, SelectionDAG &DAG, + DebugLoc dl) { + unsigned OpNum = (PFEntry >> 26) & 0x0F; + unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); + unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); + + enum { + OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> + OP_VREV, + OP_VDUP0, + OP_VDUP1, + OP_VDUP2, + OP_VDUP3, + OP_VEXT1, + OP_VEXT2, + OP_VEXT3, + OP_VUZPL, // VUZP, left result + OP_VUZPR, // VUZP, right result + OP_VZIPL, // VZIP, left result + OP_VZIPR, // VZIP, right result + OP_VTRNL, // VTRN, left result + OP_VTRNR // VTRN, right result + }; + + if (OpNum == OP_COPY) { + if (LHSID == (1*9+2)*9+3) return LHS; + assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); + return RHS; + } + + SDValue OpLHS, OpRHS; + OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); + OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); + EVT VT = OpLHS.getValueType(); + + switch (OpNum) { + default: llvm_unreachable("Unknown shuffle opcode!"); + case OP_VREV: + return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); + case OP_VDUP0: + case OP_VDUP1: + case OP_VDUP2: + case OP_VDUP3: + return DAG.getNode(ARMISD::VDUPLANE, dl, VT, + OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); + case OP_VEXT1: + case OP_VEXT2: + case OP_VEXT3: + return DAG.getNode(ARMISD::VEXT, dl, VT, + OpLHS, OpRHS, + DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); + case OP_VUZPL: + case OP_VUZPR: + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); + case OP_VZIPL: + case OP_VZIPR: + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); + case OP_VTRNL: + case OP_VTRNR: + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); + } } -static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { - return Op; +static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + ShuffleVectorSDNode *SVN = cast(Op.getNode()); + SmallVector ShuffleMask; + + // Convert shuffles that are directly supported on NEON to target-specific + // DAG nodes, instead of keeping them as shuffles and matching them again + // during code selection. This is more efficient and avoids the possibility + // of inconsistencies between legalization and selection. + // FIXME: floating-point vectors should be canonicalized to integer vectors + // of the same time so that they get CSEd properly. + SVN->getMask(ShuffleMask); + + if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { + int Lane = SVN->getSplatIndex(); + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { + return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + } + return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, + DAG.getConstant(Lane, MVT::i32)); + } + + bool ReverseVEXT; + unsigned Imm; + if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { + if (ReverseVEXT) + std::swap(V1, V2); + return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, + DAG.getConstant(Imm, MVT::i32)); + } + + if (isVREVMask(ShuffleMask, VT, 64)) + return DAG.getNode(ARMISD::VREV64, dl, VT, V1); + if (isVREVMask(ShuffleMask, VT, 32)) + return DAG.getNode(ARMISD::VREV32, dl, VT, V1); + if (isVREVMask(ShuffleMask, VT, 16)) + return DAG.getNode(ARMISD::VREV16, dl, VT, V1); + + // Check for Neon shuffles that modify both input vectors in place. + // If both results are used, i.e., if there are two shuffles with the same + // source operands and with masks corresponding to both results of one of + // these operations, DAG memoization will ensure that a single node is + // used for both shuffles. + unsigned WhichResult; + if (isVTRNMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + if (isVUZPMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + if (isVZIPMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + + // If the shuffle is not directly supported and it has 4 elements, use + // the PerfectShuffle-generated table to synthesize it from other shuffles. + if (VT.getVectorNumElements() == 4 && + (VT.is128BitVector() || VT.is64BitVector())) { + unsigned PFIndexes[4]; + for (unsigned i = 0; i != 4; ++i) { + if (ShuffleMask[i] < 0) + PFIndexes[i] = 8; + else + PFIndexes[i] = ShuffleMask[i]; + } + + // Compute the index in the perfect shuffle table. + unsigned PFTableIndex = + PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; + + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; + unsigned Cost = (PFEntry >> 30); + + if (Cost <= 4) + return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); + } + + return SDValue(); } static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - assert((VT == MVT::i8 || VT == MVT::i16) && - "unexpected type for custom-lowering vector extract"); SDValue Vec = Op.getOperand(0); SDValue Lane = Op.getOperand(1); - Op = DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); - Op = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Op, DAG.getValueType(VT)); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); + assert(VT == MVT::i32 && + Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && + "unexpected type for custom-lowering vector extract"); + return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); } static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { @@ -2458,8 +2742,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::RETURNADDR: break; case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); - case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); case ISD::SHL: @@ -2468,7 +2750,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::VSETCC: return LowerVSETCC(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); - case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); } @@ -2503,13 +2784,14 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { + MachineBasicBlock *BB, + DenseMap *EM) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); switch (MI->getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); - case ARM::tMOVCCr: { + case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch on, the @@ -2534,12 +2816,15 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - sinkMBB->addSuccessor(*i); + // Also inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), + E = BB->succ_end(); I != E; ++I) { + EM->insert(std::make_pair(*I, sinkMBB)); + sinkMBB->addSuccessor(*I); + } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. - while(!BB->succ_empty()) + while (!BB->succ_empty()) BB->removeSuccessor(BB->succ_begin()); BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -2647,7 +2932,7 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); unsigned Opc = N->getOpcode(); bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); @@ -2675,7 +2960,7 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, cast(RHS)->isNullValue()) { std::swap(LHS, RHS); SDValue Op0 = Slct.getOperand(0); - MVT OpVT = isSlctCC ? Op0.getValueType() : + EVT OpVT = isSlctCC ? Op0.getValueType() : Op0.getOperand(0).getValueType(); bool isInt = OpVT.isInteger(); CC = ISD::getSetCCInverse(CC, isInt); @@ -2770,7 +3055,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { /// operand of a vector shift left operation. That value must be in the range: /// 0 <= Value < ElementBits for a left shift; or /// 0 <= Value <= ElementBits for a long left shift. -static bool isVShiftLImm(SDValue Op, MVT VT, bool isLong, int64_t &Cnt) { +static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); if (! getVShiftImm(Op, ElementBits, Cnt)) @@ -2784,7 +3069,7 @@ static bool isVShiftLImm(SDValue Op, MVT VT, bool isLong, int64_t &Cnt) { /// absolute value must be in the range: /// 1 <= |Value| <= ElementBits for a right shift; or /// 1 <= |Value| <= ElementBits/2 for a narrow right shift. -static bool isVShiftRImm(SDValue Op, MVT VT, bool isNarrow, bool isIntrinsic, +static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); @@ -2825,7 +3110,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { case Intrinsic::arm_neon_vqrshiftns: case Intrinsic::arm_neon_vqrshiftnu: case Intrinsic::arm_neon_vqrshiftnsu: { - MVT VT = N->getOperand(1).getValueType(); + EVT VT = N->getOperand(1).getValueType(); int64_t Cnt; unsigned VShiftOpc = 0; @@ -2929,7 +3214,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { } case Intrinsic::arm_neon_vshiftins: { - MVT VT = N->getOperand(1).getValueType(); + EVT VT = N->getOperand(1).getValueType(); int64_t Cnt; unsigned VShiftOpc = 0; @@ -2962,7 +3247,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { /// their values after they get legalized to loads from a constant pool. static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // Nothing to be done for scalar shifts. if (! VT.isVector()) @@ -3005,8 +3290,8 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue Vec = N0.getOperand(0); SDValue Lane = N0.getOperand(1); - MVT VT = N->getValueType(0); - MVT EltVT = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT EltVT = N0.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (VT == MVT::i32 && @@ -3052,10 +3337,88 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } +bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { + if (!Subtarget->hasV6Ops()) + // Pre-v6 does not support unaligned mem access. + return false; + else if (!Subtarget->hasV6Ops()) { + // v6 may or may not support unaligned mem access. + if (!Subtarget->isTargetDarwin()) + return false; + } + + switch (VT.getSimpleVT().SimpleTy) { + default: + return false; + case MVT::i8: + case MVT::i16: + case MVT::i32: + return true; + // FIXME: VLD1 etc with standard alignment is legal. + } +} + +static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { + if (V < 0) + return false; + + unsigned Scale = 1; + switch (VT.getSimpleVT().SimpleTy) { + default: return false; + case MVT::i1: + case MVT::i8: + // Scale == 1; + break; + case MVT::i16: + // Scale == 2; + Scale = 2; + break; + case MVT::i32: + // Scale == 4; + Scale = 4; + break; + } + + if ((V & (Scale - 1)) != 0) + return false; + V /= Scale; + return V == (V & ((1LL << 5) - 1)); +} + +static bool isLegalT2AddressImmediate(int64_t V, EVT VT, + const ARMSubtarget *Subtarget) { + bool isNeg = false; + if (V < 0) { + isNeg = true; + V = - V; + } + + switch (VT.getSimpleVT().SimpleTy) { + default: return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + // + imm12 or - imm8 + if (isNeg) + return V == (V & ((1LL << 8) - 1)); + return V == (V & ((1LL << 12) - 1)); + case MVT::f32: + case MVT::f64: + // Same as ARM mode. FIXME: NEON? + if (!Subtarget->hasVFP2()) + return false; + if ((V & 3) != 0) + return false; + V >>= 2; + return V == (V & ((1LL << 8) - 1)); + } +} + /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. -static bool isLegalAddressImmediate(int64_t V, MVT VT, +static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget) { if (V == 0) return true; @@ -3063,36 +3426,15 @@ static bool isLegalAddressImmediate(int64_t V, MVT VT, if (!VT.isSimple()) return false; - if (Subtarget->isThumb()) { // FIXME for thumb2 - if (V < 0) - return false; - - unsigned Scale = 1; - switch (VT.getSimpleVT()) { - default: return false; - case MVT::i1: - case MVT::i8: - // Scale == 1; - break; - case MVT::i16: - // Scale == 2; - Scale = 2; - break; - case MVT::i32: - // Scale == 4; - Scale = 4; - break; - } - - if ((V & (Scale - 1)) != 0) - return false; - V /= Scale; - return V == (V & ((1LL << 5) - 1)); - } + if (Subtarget->isThumb1Only()) + return isLegalT1AddressImmediate(V, VT); + else if (Subtarget->isThumb2()) + return isLegalT2AddressImmediate(V, VT, Subtarget); + // ARM mode. if (V < 0) V = - V; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: @@ -3104,7 +3446,7 @@ static bool isLegalAddressImmediate(int64_t V, MVT VT, return V == (V & ((1LL << 8) - 1)); case MVT::f32: case MVT::f64: - if (!Subtarget->hasVFP2()) + if (!Subtarget->hasVFP2()) // FIXME: NEON? return false; if ((V & 3) != 0) return false; @@ -3113,11 +3455,44 @@ static bool isLegalAddressImmediate(int64_t V, MVT VT, } } +bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, + EVT VT) const { + int Scale = AM.Scale; + if (Scale < 0) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + default: return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + if (Scale == 1) + return true; + // r + r << imm + Scale = Scale & ~1; + return Scale == 2 || Scale == 4 || Scale == 8; + case MVT::i64: + // r + r + if (((unsigned)AM.HasBaseReg + Scale) <= 2) + return true; + return false; + case MVT::isVoid: + // Note, we allow "void" uses (basically, uses that aren't loads or + // stores), because arm allows folding a scale into many arithmetic + // operations. This should be made more precise and revisited later. + + // Allow r << imm, but the imm has to be a multiple of two. + if (Scale & 1) return false; + return isPowerOf2_32(Scale); + } +} + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { - MVT VT = getValueType(Ty, true); + EVT VT = getValueType(Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; @@ -3129,7 +3504,7 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, case 0: // no scale reg, must be "r+i" or "r", or "i". break; case 1: - if (Subtarget->isThumb()) // FIXME for thumb2 + if (Subtarget->isThumb1Only()) return false; // FALL THROUGH. default: @@ -3140,22 +3515,22 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, if (!VT.isSimple()) return false; + if (Subtarget->isThumb2()) + return isLegalT2ScaledAddressingMode(AM, VT); + int Scale = AM.Scale; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i32: - case MVT::i64: - // This assumes i64 is legalized to a pair of i32. If not (i.e. - // ldrd / strd are used, then its address mode is same as i16. - // r + r if (Scale < 0) Scale = -Scale; if (Scale == 1) return true; // r + r << imm return isPowerOf2_32(Scale & ~1); case MVT::i16: + case MVT::i64: // r + r if (((unsigned)AM.HasBaseReg + Scale) <= 2) return true; @@ -3167,15 +3542,15 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, // operations. This should be made more precise and revisited later. // Allow r << imm, but the imm has to be a multiple of two. - if (AM.Scale & 1) return false; - return isPowerOf2_32(AM.Scale); + if (Scale & 1) return false; + return isPowerOf2_32(Scale); } break; } return true; } -static bool getARMIndexedAddressParts(SDNode *Ptr, MVT VT, +static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG) { @@ -3233,7 +3608,7 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, MVT VT, return false; } -static bool getT2IndexedAddressParts(SDNode *Ptr, MVT VT, +static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG) { @@ -3269,7 +3644,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (Subtarget->isThumb1Only()) return false; - MVT VT; + EVT VT; SDValue Ptr; bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast(N)) { @@ -3284,10 +3659,10 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, bool isInc; bool isLegal = false; - if (Subtarget->isThumb() && Subtarget->hasThumb2()) + if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, isInc, DAG); - else + else isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) @@ -3308,7 +3683,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, if (Subtarget->isThumb1Only()) return false; - MVT VT; + EVT VT; SDValue Ptr; bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast(N)) { @@ -3321,10 +3696,10 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isInc; bool isLegal = false; - if (Subtarget->isThumb() && Subtarget->hasThumb2()) + if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); - else + else isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) @@ -3378,7 +3753,7 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const { std::pair ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters switch (Constraint[0]) { @@ -3402,7 +3777,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, std::vector ARMTargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { if (Constraint.size() != 1) return std::vector(); @@ -3594,3 +3969,9 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, Ops, DAG); } + +bool +ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The ARM target isn't yet aware of offsets. + return false; +}