X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FPowerPC%2FPPCISelLowering.cpp;h=af9ad077a7ce16a99400644a3eac07488276edff;hb=777479f80202057f041683129d4fd9e574ffea79;hp=0765638dce2252d9bd7278e489bc6ea7b8dc9a12;hpb=7b93acde3206e916752d94288f25ae5f08a4d419;p=oota-llvm.git diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 0765638dce2..af9ad077a7c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -39,11 +39,8 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" -using namespace llvm; -// FIXME: Remove this once soft-float is supported. -static cl::opt DisablePPCFloatInVariadic("disable-ppc-float-in-variadic", -cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden); +using namespace llvm; static cl::opt DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); @@ -71,8 +68,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); - addRegisterClass(MVT::f32, &PPC::F4RCRegClass); - addRegisterClass(MVT::f64, &PPC::F8RCRegClass); + if (!Subtarget.useSoftFloat()) { + addRegisterClass(MVT::f32, &PPC::F4RCRegClass); + addRegisterClass(MVT::f64, &PPC::F8RCRegClass); + } // PowerPC has an i16 but no i8 (or i1) SEXTLOAD for (MVT VT : MVT::integer_valuetypes()) { @@ -106,8 +105,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); - AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, - isPPC64 ? MVT::i64 : MVT::i32); + AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); } else { setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); @@ -256,10 +255,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); - setOperationAction(ISD::BITCAST, MVT::f32, Expand); - setOperationAction(ISD::BITCAST, MVT::i32, Expand); - setOperationAction(ISD::BITCAST, MVT::i64, Expand); - setOperationAction(ISD::BITCAST, MVT::f64, Expand); + if (Subtarget.hasDirectMove()) { + setOperationAction(ISD::BITCAST, MVT::f32, Legal); + setOperationAction(ISD::BITCAST, MVT::i32, Legal); + setOperationAction(ISD::BITCAST, MVT::i64, Legal); + setOperationAction(ISD::BITCAST, MVT::f64, Legal); + } else { + setOperationAction(ISD::BITCAST, MVT::f32, Expand); + setOperationAction(ISD::BITCAST, MVT::i32, Expand); + setOperationAction(ISD::BITCAST, MVT::i64, Expand); + setOperationAction(ISD::BITCAST, MVT::f64, Expand); + } // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); @@ -328,6 +334,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); + setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); + setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -402,11 +410,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // will selectively turn on ones that can be effectively codegen'd. for (MVT VT : MVT::vector_valuetypes()) { // add/sub are legal for all supported vector VT's. - setOperationAction(ISD::ADD , VT, Legal); - setOperationAction(ISD::SUB , VT, Legal); + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); // Vector instructions introduced in P8 - if (Subtarget.hasP8Altivec()) { + if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { setOperationAction(ISD::CTPOP, VT, Legal); setOperationAction(ISD::CTLZ, VT, Legal); } @@ -430,6 +438,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); + setOperationAction(ISD::SELECT_CC, VT, Promote); + AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType (ISD::STORE, VT, MVT::v4i32); @@ -474,6 +484,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); for (MVT InnerVT : MVT::vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); @@ -516,7 +528,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } - setOperationAction(ISD::MUL, MVT::v4i32, Custom); + if (Subtarget.hasP8Altivec()) + setOperationAction(ISD::MUL, MVT::v4i32, Legal); + else + setOperationAction(ISD::MUL, MVT::v4i32, Custom); + setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -537,6 +553,21 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.hasVSX()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); + if (Subtarget.hasP8Vector()) { + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); + } + if (Subtarget.hasDirectMove()) { + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); + } + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); @@ -569,20 +600,33 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); + if (Subtarget.hasP8Vector()) + addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); + addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); + addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); - // VSX v2i64 only supports non-arithmetic operations. - setOperationAction(ISD::ADD, MVT::v2i64, Expand); - setOperationAction(ISD::SUB, MVT::v2i64, Expand); + if (Subtarget.hasP8Altivec()) { + setOperationAction(ISD::SHL, MVT::v2i64, Legal); + setOperationAction(ISD::SRA, MVT::v2i64, Legal); + setOperationAction(ISD::SRL, MVT::v2i64, Legal); + + setOperationAction(ISD::SETCC, MVT::v2i64, Legal); + } + else { + setOperationAction(ISD::SHL, MVT::v2i64, Expand); + setOperationAction(ISD::SRA, MVT::v2i64, Expand); + setOperationAction(ISD::SRL, MVT::v2i64, Expand); - setOperationAction(ISD::SHL, MVT::v2i64, Expand); - setOperationAction(ISD::SRA, MVT::v2i64, Expand); - setOperationAction(ISD::SRL, MVT::v2i64, Expand); + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + // VSX v2i64 only supports non-arithmetic operations. + setOperationAction(ISD::ADD, MVT::v2i64, Expand); + setOperationAction(ISD::SUB, MVT::v2i64, Expand); + } setOperationAction(ISD::LOAD, MVT::v2i64, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); @@ -606,8 +650,166 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } - if (Subtarget.hasP8Altivec()) + if (Subtarget.hasP8Altivec()) { addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); + addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); + } + } + + if (Subtarget.hasQPX()) { + setOperationAction(ISD::FADD, MVT::v4f64, Legal); + setOperationAction(ISD::FSUB, MVT::v4f64, Legal); + setOperationAction(ISD::FMUL, MVT::v4f64, Legal); + setOperationAction(ISD::FREM, MVT::v4f64, Expand); + + setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); + setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); + + setOperationAction(ISD::LOAD , MVT::v4f64, Custom); + setOperationAction(ISD::STORE , MVT::v4f64, Custom); + + setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); + + if (!Subtarget.useCRBits()) + setOperationAction(ISD::SELECT, MVT::v4f64, Expand); + setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); + setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); + + setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); + setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); + + setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); + setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); + + setOperationAction(ISD::FNEG , MVT::v4f64, Legal); + setOperationAction(ISD::FABS , MVT::v4f64, Legal); + setOperationAction(ISD::FSIN , MVT::v4f64, Expand); + setOperationAction(ISD::FCOS , MVT::v4f64, Expand); + setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); + setOperationAction(ISD::FPOW , MVT::v4f64, Expand); + setOperationAction(ISD::FLOG , MVT::v4f64, Expand); + setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); + setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); + setOperationAction(ISD::FEXP , MVT::v4f64, Expand); + setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); + + setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); + + setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); + + addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); + + setOperationAction(ISD::FADD, MVT::v4f32, Legal); + setOperationAction(ISD::FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::FREM, MVT::v4f32, Expand); + + setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); + setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); + + setOperationAction(ISD::LOAD , MVT::v4f32, Custom); + setOperationAction(ISD::STORE , MVT::v4f32, Custom); + + if (!Subtarget.useCRBits()) + setOperationAction(ISD::SELECT, MVT::v4f32, Expand); + setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); + setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + + setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); + setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); + + setOperationAction(ISD::FNEG , MVT::v4f32, Legal); + setOperationAction(ISD::FABS , MVT::v4f32, Legal); + setOperationAction(ISD::FSIN , MVT::v4f32, Expand); + setOperationAction(ISD::FCOS , MVT::v4f32, Expand); + setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); + setOperationAction(ISD::FPOW , MVT::v4f32, Expand); + setOperationAction(ISD::FLOG , MVT::v4f32, Expand); + setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); + setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); + setOperationAction(ISD::FEXP , MVT::v4f32, Expand); + setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); + + setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); + + setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); + + addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); + + setOperationAction(ISD::AND , MVT::v4i1, Legal); + setOperationAction(ISD::OR , MVT::v4i1, Legal); + setOperationAction(ISD::XOR , MVT::v4i1, Legal); + + if (!Subtarget.useCRBits()) + setOperationAction(ISD::SELECT, MVT::v4i1, Expand); + setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); + + setOperationAction(ISD::LOAD , MVT::v4i1, Custom); + setOperationAction(ISD::STORE , MVT::v4i1, Custom); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); + setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); + + setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); + + addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); + + setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); + setOperationAction(ISD::FROUND, MVT::v4f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + + setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); + + // These need to set FE_INEXACT, and so cannot be vectorized here. + setOperationAction(ISD::FRINT, MVT::v4f64, Expand); + setOperationAction(ISD::FRINT, MVT::v4f32, Expand); + + if (TM.Options.UnsafeFPMath) { + setOperationAction(ISD::FDIV, MVT::v4f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); + + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + } else { + setOperationAction(ISD::FDIV, MVT::v4f64, Expand); + setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); + + setOperationAction(ISD::FDIV, MVT::v4f32, Expand); + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); + } } if (Subtarget.has64BitSupport()) @@ -621,8 +823,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } setBooleanContents(ZeroOrOneBooleanContent); - // Altivec instructions set fields to all zeros or all ones. - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + if (Subtarget.hasAltivec()) { + // Altivec instructions set fields to all zeros or all ones. + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + } if (!isPPC64) { // These libcalls are not available in 32-bit. @@ -631,15 +836,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLibcallName(RTLIB::SRA_I128, nullptr); } - if (isPPC64) { - setStackPointerRegisterToSaveRestore(PPC::X1); - setExceptionPointerRegister(PPC::X3); - setExceptionSelectorRegister(PPC::X4); - } else { - setStackPointerRegisterToSaveRestore(PPC::R1); - setExceptionPointerRegister(PPC::R3); - setExceptionSelectorRegister(PPC::R4); - } + setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::SINT_TO_FP); @@ -721,7 +918,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, else setSchedulingPreference(Sched::Hybrid); - computeRegisterProperties(); + computeRegisterProperties(STI.getRegisterInfo()); // The Freescale cores do better with aggressive inlining of memcpy and // friends. GCC uses same threshold of 128 bytes (= 32 word stores). @@ -733,6 +930,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, MaxStoresPerMemcpyOptSize = 8; MaxStoresPerMemmove = 32; MaxStoresPerMemmoveOptSize = 8; + } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { + // The A2 also benefits from (very) aggressive inlining of memcpy and + // friends. The overhead of a the function call, even when warm, can be + // over one hundred cycles. + MaxStoresPerMemset = 128; + MaxStoresPerMemcpy = 128; + MaxStoresPerMemmove = 128; } } @@ -753,9 +957,9 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, if (EltAlign > MaxAlign) MaxAlign = EltAlign; } else if (StructType *STy = dyn_cast(Ty)) { - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + for (auto *EltTy : STy->elements()) { unsigned EltAlign = 0; - getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); + getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; if (MaxAlign == MaxMaxAlign) @@ -766,7 +970,8 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. -unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { +unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, + const DataLayout &DL) const { // Darwin passes everything on 4 byte boundary. if (Subtarget.isDarwin()) return 4; @@ -779,9 +984,13 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { return Align; } +bool PPCTargetLowering::useSoftFloat() const { + return Subtarget.useSoftFloat(); +} + const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: return nullptr; + switch ((PPCISD::NodeType)Opcode) { + case PPCISD::FIRST_NUMBER: break; case PPCISD::FSEL: return "PPCISD::FSEL"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; @@ -802,10 +1011,12 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; + case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; case PPCISD::SRL: return "PPCISD::SRL"; case PPCISD::SRA: return "PPCISD::SRA"; case PPCISD::SHL: return "PPCISD::SHL"; + case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; @@ -816,14 +1027,19 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; + case PPCISD::MFVSR: return "PPCISD::MFVSR"; + case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; + case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; + case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; + case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; case PPCISD::LBRX: return "PPCISD::LBRX"; case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; - case PPCISD::LARX: return "PPCISD::LARX"; - case PPCISD::STCX: return "PPCISD::STCX"; + case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; + case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::BDNZ: return "PPCISD::BDNZ"; case PPCISD::BDZ: return "PPCISD::BDZ"; @@ -832,10 +1048,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; case PPCISD::CR6SET: return "PPCISD::CR6SET"; case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; - case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; - case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; - case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; + case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; @@ -851,12 +1065,28 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; case PPCISD::SC: return "PPCISD::SC"; - } -} - -EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { + case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; + case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; + case PPCISD::RFEBB: return "PPCISD::RFEBB"; + case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; + case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; + case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; + case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; + case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; + case PPCISD::QBFLT: return "PPCISD::QBFLT"; + case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; + } + return nullptr; +} + +EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, + EVT VT) const { if (!VT.isVector()) return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; + + if (Subtarget.hasQPX()) + return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); + return VT.changeVectorElementTypeToInteger(); } @@ -892,11 +1122,11 @@ static bool isConstantOrUndef(int Op, int Val) { /// VPKUHUM instruction. /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical -/// inputs (1), and little-endian operantion with two different inputs (2). +/// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { - bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian(); + bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; @@ -923,11 +1153,11 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, /// VPKUWUM instruction. /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical -/// inputs (1), and little-endian operantion with two different inputs (2). +/// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { - bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian(); + bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; @@ -954,6 +1184,56 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, return true; } +/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a +/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the +/// current subtarget. +/// +/// The ShuffleKind distinguishes between big-endian operations with +/// two different inputs (0), either-endian operations with two identical +/// inputs (1), and little-endian operations with two different inputs (2). +/// For the latter, the input operands are swapped (see PPCInstrAltivec.td). +bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG) { + const PPCSubtarget& Subtarget = + static_cast(DAG.getSubtarget()); + if (!Subtarget.hasP8Vector()) + return false; + + bool IsLE = DAG.getDataLayout().isLittleEndian(); + if (ShuffleKind == 0) { + if (IsLE) + return false; + for (unsigned i = 0; i != 16; i += 4) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || + !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || + !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) + return false; + } else if (ShuffleKind == 2) { + if (!IsLE) + return false; + for (unsigned i = 0; i != 16; i += 4) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || + !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) + return false; + } else if (ShuffleKind == 1) { + unsigned j = IsLE ? 0 : 4; + for (unsigned i = 0; i != 8; i += 4) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || + !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || + !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || + !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || + !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) + return false; + } + return true; +} + /// isVMerge - Common function, used to match vmrg* shuffles. /// static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, @@ -976,13 +1256,13 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). -/// The ShuffleKind distinguishes between big-endian merges with two +/// The ShuffleKind distinguishes between big-endian merges with two /// different inputs (0), either-endian merges with two identical inputs (1), /// and little-endian merges with two different inputs (2). For the latter, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 0, 0); else if (ShuffleKind == 2) // swapped @@ -1001,13 +1281,13 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). -/// The ShuffleKind distinguishes between big-endian merges with two +/// The ShuffleKind distinguishes between big-endian merges with two /// different inputs (0), either-endian merges with two identical inputs (1), /// and little-endian merges with two different inputs (2). For the latter, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 8, 8); else if (ShuffleKind == 2) // swapped @@ -1024,10 +1304,103 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, } } +/** + * \brief Common function used to match vmrgew and vmrgow shuffles + * + * The indexOffset determines whether to look for even or odd words in + * the shuffle mask. This is based on the of the endianness of the target + * machine. + * - Little Endian: + * - Use offset of 0 to check for odd elements + * - Use offset of 4 to check for even elements + * - Big Endian: + * - Use offset of 0 to check for even elements + * - Use offset of 4 to check for odd elements + * A detailed description of the vector element ordering for little endian and + * big endian can be found at + * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html + * Targeting your applications - what little endian and big endian IBM XL C/C++ + * compiler differences mean to you + * + * The mask to the shuffle vector instruction specifies the indices of the + * elements from the two input vectors to place in the result. The elements are + * numbered in array-access order, starting with the first vector. These vectors + * are always of type v16i8, thus each vector will contain 16 elements of size + * 8. More info on the shuffle vector can be found in the + * http://llvm.org/docs/LangRef.html#shufflevector-instruction + * Language Reference. + * + * The RHSStartValue indicates whether the same input vectors are used (unary) + * or two different input vectors are used, based on the following: + * - If the instruction uses the same vector for both inputs, the range of the + * indices will be 0 to 15. In this case, the RHSStart value passed should + * be 0. + * - If the instruction has two different vectors then the range of the + * indices will be 0 to 31. In this case, the RHSStart value passed should + * be 16 (indices 0-15 specify elements in the first vector while indices 16 + * to 31 specify elements in the second vector). + * + * \param[in] N The shuffle vector SD Node to analyze + * \param[in] IndexOffset Specifies whether to look for even or odd elements + * \param[in] RHSStartValue Specifies the starting index for the righthand input + * vector to the shuffle_vector instruction + * \return true iff this shuffle vector represents an even or odd word merge + */ +static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, + unsigned RHSStartValue) { + if (N->getValueType(0) != MVT::v16i8) + return false; + + for (unsigned i = 0; i < 2; ++i) + for (unsigned j = 0; j < 4; ++j) + if (!isConstantOrUndef(N->getMaskElt(i*4+j), + i*RHSStartValue+j+IndexOffset) || + !isConstantOrUndef(N->getMaskElt(i*4+j+8), + i*RHSStartValue+j+IndexOffset+8)) + return false; + return true; +} + +/** + * \brief Determine if the specified shuffle mask is suitable for the vmrgew or + * vmrgow instructions. + * + * \param[in] N The shuffle vector SD Node to analyze + * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) + * \param[in] ShuffleKind Identify the type of merge: + * - 0 = big-endian merge with two different inputs; + * - 1 = either-endian merge with two identical inputs; + * - 2 = little-endian merge with two different inputs (inputs are swapped for + * little-endian merges). + * \param[in] DAG The current SelectionDAG + * \return true iff this shuffle mask + */ +bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, + unsigned ShuffleKind, SelectionDAG &DAG) { + if (DAG.getDataLayout().isLittleEndian()) { + unsigned indexOffset = CheckEven ? 4 : 0; + if (ShuffleKind == 1) // Unary + return isVMerge(N, indexOffset, 0); + else if (ShuffleKind == 2) // swapped + return isVMerge(N, indexOffset, 16); + else + return false; + } + else { + unsigned indexOffset = CheckEven ? 0 : 4; + if (ShuffleKind == 1) // Unary + return isVMerge(N, indexOffset, 0); + else if (ShuffleKind == 0) // Normal + return isVMerge(N, indexOffset, 16); + else + return false; + } + return false; +} /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. -/// The ShuffleKind distinguishes between big-endian operations with two +/// The ShuffleKind distinguishes between big-endian operations with two /// different inputs (0), either-endian operations with two identical inputs /// (1), and little-endian operations with two different inputs (2). For the /// latter, the input operands are swapped (see PPCInstrAltivec.td). @@ -1051,7 +1424,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, if (ShiftAmt < i) return -1; ShiftAmt -= i; - bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian(); + bool isLE = DAG.getDataLayout().isLittleEndian(); if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { // Check the rest of the elements to see if they are consecutive. @@ -1066,7 +1439,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, } else return -1; - if (ShuffleKind == 2 && isLE) + if (isLE) ShiftAmt = 16 - ShiftAmt; return ShiftAmt; @@ -1079,6 +1452,11 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { assert(N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)); + // The consecutive indices need to specify an element, not part of two + // different elements. So abandon ship early if this isn't the case. + if (N->getMaskElt(0) % EltSize != 0) + return false; + // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. unsigned ElementBase = N->getMaskElt(0); @@ -1102,29 +1480,13 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } -/// isAllNegativeZeroVector - Returns true if all elements of build_vector -/// are -0.0. -bool PPC::isAllNegativeZeroVector(SDNode *N) { - BuildVectorSDNode *BV = cast(N); - - APInt APVal, APUndef; - unsigned BitSize; - bool HasAnyUndefs; - - if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) - if (ConstantFPSDNode *CFP = dyn_cast(N->getOperand(0))) - return CFP->getValueAPF().isNegZero(); - - return false; -} - /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(N); assert(isSplatShuffleMask(SVOp, EltSize)); - if (DAG.getTarget().getDataLayout()->isLittleEndian()) + if (DAG.getDataLayout().isLittleEndian()) return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); else return SVOp->getMaskElt(0) / EltSize; @@ -1171,23 +1533,23 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { for (unsigned i = 0; i != Multiple-1; ++i) { if (!UniquedVals[i].getNode()) continue; // Must have been undefs. - LeadingZero &= cast(UniquedVals[i])->isNullValue(); - LeadingOnes &= cast(UniquedVals[i])->isAllOnesValue(); + LeadingZero &= isNullConstant(UniquedVals[i]); + LeadingOnes &= isAllOnesConstant(UniquedVals[i]); } // Finally, check the least significant entry. if (LeadingZero) { if (!UniquedVals[Multiple-1].getNode()) - return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef + return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef int Val = cast(UniquedVals[Multiple-1])->getZExtValue(); - if (Val < 16) - return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) + if (Val < 16) // 0,0,0,4 -> vspltisw(4) + return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); } if (LeadingOnes) { if (!UniquedVals[Multiple-1].getNode()) - return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef + return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef int Val =cast(UniquedVals[Multiple-1])->getSExtValue(); if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) - return DAG.getTargetConstant(Val, MVT::i32); + return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); } return SDValue(); @@ -1218,17 +1580,10 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // immediate field for would be zero, and we prefer to use vxor for it. if (ValSizeInBytes < ByteSize) return SDValue(); - // If the element value is larger than the splat value, cut it in half and - // check to see if the two halves are equal. Continue doing this until we - // get to ByteSize. This allows us to handle 0x01010101 as 0x01. - while (ValSizeInBytes > ByteSize) { - ValSizeInBytes >>= 1; - - // If the top half equals the bottom half, we're still ok. - if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != - (Value & ((1 << (8*ValSizeInBytes))-1))) - return SDValue(); - } + // If the element value is larger than the splat value, check if it consists + // of a repeated bit pattern of size ByteSize. + if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) + return SDValue(); // Properly sign extend the value. int MaskVal = SignExtend32(Value, ByteSize * 8); @@ -1238,10 +1593,40 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // Finally, if this value fits in a 5 bit sext field, return it if (SignExtend32<5>(MaskVal) == MaskVal) - return DAG.getTargetConstant(MaskVal, MVT::i32); + return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); return SDValue(); } +/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift +/// amount, otherwise return -1. +int PPC::isQVALIGNIShuffleMask(SDNode *N) { + EVT VT = N->getValueType(0); + if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) + return -1; + + ShuffleVectorSDNode *SVOp = cast(N); + + // Find the first non-undef value in the shuffle mask. + unsigned i; + for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) + /*search*/; + + if (i == 4) return -1; // all undef. + + // Otherwise, check to see if the rest of the elements are consecutively + // numbered from this value. + unsigned ShiftAmt = SVOp->getMaskElt(i); + if (ShiftAmt < i) return -1; + ShiftAmt -= i; + + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 4; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) + return -1; + + return ShiftAmt; +} + //===----------------------------------------------------------------------===// // Addressing Mode Selection //===----------------------------------------------------------------------===// @@ -1264,7 +1649,6 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } - /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented with [r+imm]. @@ -1367,7 +1751,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, short imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Aligned || (imm & 3) == 0)) { - Disp = DAG.getTargetConstant(imm, N.getValueType()); + Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); @@ -1407,7 +1791,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } else { Base = N.getOperand(0); } - Disp = DAG.getTargetConstant(imm, N.getValueType()); + Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); return true; } } @@ -1418,7 +1802,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // this as "d, 0" short Imm; if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { - Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); + Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; @@ -1431,16 +1815,17 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, int Addr = (int)CN->getZExtValue(); // Otherwise, break this down into an LIS + disp. - Disp = DAG.getTargetConstant((short)Addr, MVT::i32); + Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); - Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); + Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, + MVT::i32); unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); return true; } } - Disp = DAG.getTargetConstant(0, getPointerTy()); + Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); if (FrameIndexSDNode *FI = dyn_cast(N)) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); @@ -1501,9 +1886,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, } else return false; - // PowerPC doesn't have preinc load/store instructions for vectors. - if (VT.isVector()) - return false; + // PowerPC doesn't have preinc load/store instructions for vectors (except + // for QPX, which does have preinc r+r forms). + if (VT.isVector()) { + if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { + return false; + } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { + AM = ISD::PRE_INC; + return true; + } + } if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { @@ -1592,9 +1984,9 @@ static bool GetLabelAccessInfo(const TargetMachine &TM, static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG) { - EVT PtrVT = HiPart.getValueType(); - SDValue Zero = DAG.getConstant(0, PtrVT); SDLoc DL(HiPart); + EVT PtrVT = HiPart.getValueType(); + SDValue Zero = DAG.getConstant(0, DL, PtrVT); SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); @@ -1618,6 +2010,19 @@ static void setUsesTOCBasePtr(SelectionDAG &DAG) { setUsesTOCBasePtr(DAG.getMachineFunction()); } +static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit, + SDValue GA) { + EVT VT = Is64Bit ? MVT::i64 : MVT::i32; + SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : + DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); + + SDValue Ops[] = { GA, Reg }; + return DAG.getMemIntrinsicNode( + PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true, + false, 0); +} + SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); @@ -1629,8 +2034,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); - return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, - DAG.getRegister(PPC::X2, MVT::i64)); + return getTOCEntry(DAG, SDLoc(CP), true, GA); } unsigned MOHiFlag, MOLoFlag; @@ -1640,9 +2044,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, if (isPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), PPCII::MO_PIC_FLAG); - SDLoc DL(CP); - return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, - DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); + return getTOCEntry(DAG, SDLoc(CP), false, GA); } SDValue CPIHi = @@ -1661,8 +2063,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, - DAG.getRegister(PPC::X2, MVT::i64)); + return getTOCEntry(DAG, SDLoc(JT), true, GA); } unsigned MOHiFlag, MOLoFlag; @@ -1672,9 +2073,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { if (isPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, PPCII::MO_PIC_FLAG); - SDLoc DL(GA); - return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA, - DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); + return getTOCEntry(DAG, SDLoc(GA), false, GA); } SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); @@ -1693,8 +2092,7 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); - return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA, - DAG.getRegister(PPC::X2, MVT::i64)); + return getTOCEntry(DAG, SDLoc(BASDN), true, GA); } unsigned MOHiFlag, MOLoFlag; @@ -1713,9 +2111,12 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, // large models could be added if users need it, at the cost of // additional complexity. GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool is64bit = Subtarget.isPPC64(); const Module *M = DAG.getMachineFunction().getFunction()->getParent(); PICLevel::Level picLevel = M->getPICLevel(); @@ -1804,8 +2205,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); - return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, - DAG.getRegister(PPC::X2, MVT::i64)); + return getTOCEntry(DAG, DL, true, GA); } unsigned MOHiFlag, MOLoFlag; @@ -1816,8 +2216,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), PPCII::MO_PIC_FLAG); - return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, - DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32)); + return getTOCEntry(DAG, DL, false, GA); } SDValue GAHi = @@ -1874,7 +2273,7 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { unsigned Log2b = Log2_32(VT.getSizeInBits()); SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, - DAG.getConstant(Log2b, MVT::i32)); + DAG.getConstant(Log2b, dl, MVT::i32)); return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); } // Leave comparisons against 0 and -1 alone for now, since they're usually @@ -1894,7 +2293,7 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), Op.getOperand(1)); - return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); + return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); } return SDValue(); } @@ -1903,7 +2302,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast(Node->getOperand(2))->getValue(); @@ -1920,11 +2319,11 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, if (VT == MVT::i64) { // Check if GprIndex is even SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, dl, MVT::i32)); SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, - DAG.getConstant(0, MVT::i32), ISD::SETNE); + DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, dl, MVT::i32)); // Align GprIndex to be even if it isn't GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, GprIndex); @@ -1932,7 +2331,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, // fpr index is 1 byte after gpr SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, dl, MVT::i32)); // fpr SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, @@ -1941,10 +2340,10 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, InChain = FprIndex.getValue(1); SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, - DAG.getConstant(8, MVT::i32)); + DAG.getConstant(8, dl, MVT::i32)); SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, - DAG.getConstant(4, MVT::i32)); + DAG.getConstant(4, dl, MVT::i32)); // areas SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, @@ -1959,12 +2358,12 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, // select overflow_area if index > 8 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, - DAG.getConstant(8, MVT::i32), ISD::SETLT); + DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); // adjustment constant gpr_index * 4/8 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, - DAG.getConstant(VT.isInteger() ? 4 : 8, + DAG.getConstant(VT.isInteger() ? 4 : 8, dl, MVT::i32)); // OurReg = RegSaveArea + RegConstant @@ -1974,12 +2373,12 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, // Floating types are 32 bytes into RegSaveArea if (VT.isFloatingPoint()) OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, - DAG.getConstant(32, MVT::i32)); + DAG.getConstant(32, dl, MVT::i32)); // increase {f,g}pr_index by 1 (or 2 if VT is i64) SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, - DAG.getConstant(VT == MVT::i64 ? 2 : 1, + DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl, MVT::i32)); InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, @@ -1993,7 +2392,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, // increase overflow_area by 4/8 if gpr/fpr > 8 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, DAG.getConstant(VT.isInteger() ? 4 : 8, - MVT::i32)); + dl, MVT::i32)); OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, OverflowAreaPlusN); @@ -2015,8 +2414,8 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), - DAG.getConstant(12, MVT::i32), 8, false, true, - MachinePointerInfo(), MachinePointerInfo()); + DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, + false, MachinePointerInfo(), MachinePointerInfo()); } SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, @@ -2032,11 +2431,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDValue Nest = Op.getOperand(3); // 'nest' parameter value SDLoc dl(Op); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); - Type *IntPtrTy = - DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( - *DAG.getContext()); + Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -2045,7 +2442,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, Entry.Node = Trmp; Args.push_back(Entry); // TrampSize == (isPPC64 ? 48 : 40); - Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, + Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl, isPPC64 ? MVT::i64 : MVT::i32); Args.push_back(Entry); @@ -2073,7 +2470,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), @@ -2105,12 +2502,10 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, // */ // } va_list[1]; + SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); + SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); - SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); - SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); - - - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), PtrVT); @@ -2118,13 +2513,13 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, PtrVT); uint64_t FrameOffset = PtrVT.getSizeInBits()/8; - SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); + SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; - SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); + SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); uint64_t FPROffset = 1; - SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); + SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); @@ -2162,7 +2557,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" -// Function whose sole purpose is to kill compiler warnings +// Function whose sole purpose is to kill compiler warnings // stemming from unused functions included from PPCGenCallingConv.inc. CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; @@ -2186,7 +2581,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, }; const unsigned NumArgRegs = array_lengthof(ArgRegs); - unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); + unsigned RegNum = State.getFirstUnallocated(ArgRegs); // Skip one register if the first unallocated register has an even register // number and there are still argument registers available which have not been @@ -2214,7 +2609,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, const unsigned NumArgRegs = array_lengthof(ArgRegs); - unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); + unsigned RegNum = State.getFirstUnallocated(ArgRegs); // If there is only one Floating-point register left we need to put both f64 // values of a split ppc_fp128 value on the stack. @@ -2229,16 +2624,16 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -/// GetFPR - Get the set of FP registers that should be allocated for arguments, +/// FPR - The set of FP registers that should be allocated for arguments, /// on Darwin. -static const MCPhysReg *GetFPR() { - static const MCPhysReg FPR[] = { - PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 - }; +static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, + PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, + PPC::F11, PPC::F12, PPC::F13}; - return FPR; -} +/// QFPR - The set of QPX registers that should be allocated for arguments. +static const MCPhysReg QFPR[] = { + PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, + PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13}; /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. @@ -2266,8 +2661,13 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, // Altivec parameters are padded to a 16 byte boundary. if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || - ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) + ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || + ArgVT == MVT::v1i128) Align = 16; + // QPX vector types stored in double-precision are padded to a 32 byte + // boundary. + else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) + Align = 32; // ByVal parameters are aligned as requested. if (Flags.isByVal()) { @@ -2306,7 +2706,7 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, - unsigned &AvailableVRs) { + unsigned &AvailableVRs, bool HasQPX) { bool UseMemory = false; // Respect alignment of argument on the stack. @@ -2330,14 +2730,19 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, // However, if the argument is actually passed in an FPR or a VR, // we don't use memory after all. if (!Flags.isByVal()) { - if (ArgVT == MVT::f32 || ArgVT == MVT::f64) + if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || + // QPX registers overlap with the scalar FP registers. + (HasQPX && (ArgVT == MVT::v4f32 || + ArgVT == MVT::v4f64 || + ArgVT == MVT::v4i1))) if (AvailableFPRs > 0) { --AvailableFPRs; return false; } if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || - ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) + ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || + ArgVT == MVT::v1i128) if (AvailableVRs > 0) { --AvailableVRs; return false; @@ -2420,7 +2825,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); @@ -2453,7 +2858,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( RC = &PPC::GPRCRegClass; break; case MVT::f32: - RC = &PPC::F4RCRegClass; + if (Subtarget.hasP8Vector()) + RC = &PPC::VSSRCRegClass; + else + RC = &PPC::F4RCRegClass; break; case MVT::f64: if (Subtarget.hasVSX()) @@ -2464,13 +2872,21 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: - case MVT::v4f32: RC = &PPC::VRRCRegClass; break; + case MVT::v4f32: + RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; + break; case MVT::v2f64: case MVT::v2i64: RC = &PPC::VSHRCRegClass; break; + case MVT::v4f64: + RC = &PPC::QFRCRegClass; + break; + case MVT::v4i1: + RC = &PPC::QBRCRegClass; + break; } // Transform the arguments stored in physical registers into virtual ones. @@ -2538,13 +2954,12 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( PPC::F8 }; unsigned NumFPArgRegs = array_lengthof(FPArgRegs); - if (DisablePPCFloatInVariadic) - NumFPArgRegs = 0; - FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, - NumGPArgRegs)); - FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, - NumFPArgRegs)); + if (Subtarget.useSoftFloat()) + NumFPArgRegs = 0; + + FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); + FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); // Make room for NumGPArgRegs and NumFPArgRegs. int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + @@ -2571,7 +2986,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( MachinePointerInfo(), false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store - SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); + SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } @@ -2590,7 +3005,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( MachinePointerInfo(), false, false, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store - SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, + SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } @@ -2637,7 +3052,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( assert(!(CallConv == CallingConv::Fast && isVarArg) && "fastcc not supported on varargs functions"); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); @@ -2648,9 +3063,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -2663,6 +3075,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof(VR); + const unsigned Num_QFPR_Regs = Num_FPR_Regs; // Do a first pass over the arguments to determine whether the ABI // guarantees that our caller has allocated the parameter save area @@ -2675,11 +3088,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( unsigned NumBytes = LinkageSize; unsigned AvailableFPRs = Num_FPR_Regs; unsigned AvailableVRs = Num_VR_Regs; - for (unsigned i = 0, e = Ins.size(); i != e; ++i) + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + if (Ins[i].Flags.isNest()) + continue; + if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, PtrByteSize, LinkageSize, ParamAreaSize, - NumBytes, AvailableFPRs, AvailableVRs)) + NumBytes, AvailableFPRs, AvailableVRs, + Subtarget.hasQPX())) HasParameterArea = true; + } // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, @@ -2687,6 +3105,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( unsigned ArgOffset = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + unsigned &QFPR_idx = FPR_idx; SmallVector MemOps; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; @@ -2698,9 +3117,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( unsigned ObjSize = ObjectVT.getStoreSize(); unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; - std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); - CurArgIdx = Ins[ArgNo].OrigArgIndex; - + if (Ins[ArgNo].isOrigArg()) { + std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); + CurArgIdx = Ins[ArgNo].getOrigArgIndex(); + } // We re-align the argument offset for each argument, except when using the // fast calling convention, when we need to make sure we do that only when // we'll actually use a stack slot. @@ -2723,6 +3143,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { + assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); + if (CallConv == CallingConv::Fast) ComputeArgOffset(); @@ -2763,7 +3185,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // address of the enclosing doubleword on big-endian systems. SDValue Arg = FIN; if (!isLittleEndian) { - SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT); + SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); } InVals.push_back(Arg); @@ -2777,15 +3199,15 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, - MachinePointerInfo(FuncArg), - ObjType, false, false, 0); + MachinePointerInfo(&*FuncArg), ObjType, + false, false, 0); } else { // For sizes that don't fit a truncating store (3, 5, 6, 7), // store the whole register as-is to the parameter save area // slot. - Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg), - false, false, 0); + Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(&*FuncArg), false, false, 0); } MemOps.push_back(Store); @@ -2809,12 +3231,12 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Addr = FIN; if (j) { - SDValue Off = DAG.getConstant(j, PtrVT); + SDValue Off = DAG.getConstant(j, dl, PtrVT); Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); } - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, - MachinePointerInfo(FuncArg, j), - false, false, 0); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, Addr, + MachinePointerInfo(&*FuncArg, j), false, false, 0); MemOps.push_back(Store); ++GPR_idx; } @@ -2827,6 +3249,17 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::i1: case MVT::i32: case MVT::i64: + if (Flags.isNest()) { + // The 'nest' parameter, if any, is passed in R11. + unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); + + if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) + ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); + + break; + } + // These can be scalar arguments or elements of an integer array type // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. @@ -2858,7 +3291,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( unsigned VReg; if (ObjectVT == MVT::f32) - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); + VReg = MF.addLiveIn(FPR[FPR_idx], + Subtarget.hasP8Vector() + ? &PPC::VSSRCRegClass + : &PPC::F4RCRegClass); else VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ? &PPC::VSFRCRegClass @@ -2879,7 +3315,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( if (ObjectVT == MVT::f32) { if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, - DAG.getConstant(32, MVT::i32)); + DAG.getConstant(32, dl, MVT::i32)); ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); } @@ -2907,6 +3343,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: + case MVT::v1i128: + if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. @@ -2925,6 +3363,36 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += 16; break; + } // not QPX + + assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && + "Invalid QPX parameter type"); + /* fall through */ + + case MVT::v4f64: + case MVT::v4i1: + // QPX vectors are treated like their scalar floating-point subregisters + // (except that they're larger). + unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; + if (QFPR_idx != Num_QFPR_Regs) { + const TargetRegisterClass *RC; + switch (ObjectVT.getSimpleVT().SimpleTy) { + case MVT::v4f64: RC = &PPC::QFRCRegClass; break; + case MVT::v4f32: RC = &PPC::QSRCRegClass; break; + default: RC = &PPC::QBRCRegClass; break; + } + + unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); + ++QFPR_idx; + } else { + if (CallConv == CallingConv::Fast) + ComputeArgOffset(); + needsLoad = true; + } + if (CallConv != CallingConv::Fast || needsLoad) + ArgOffset += Sz; + break; } // We need to load the argument to a virtual register if we determined @@ -2976,7 +3444,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( MachinePointerInfo(), false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store - SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT); + SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } @@ -3001,7 +3469,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && @@ -3020,9 +3488,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin( PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -3101,9 +3566,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned ObjSize = ObjectVT.getSizeInBits()/8; unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; - std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); - CurArgIdx = Ins[ArgNo].OrigArgIndex; - + if (Ins[ArgNo].isOrigArg()) { + std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); + CurArgIdx = Ins[ArgNo].getOrigArgIndex(); + } unsigned CurArgOffset = ArgOffset; // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. @@ -3124,6 +3590,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { + assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); + // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; @@ -3146,7 +3614,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg), + MachinePointerInfo(&*FuncArg), ObjType, false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -3169,9 +3637,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, j), - false, false, 0); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(&*FuncArg, j), false, false, 0); MemOps.push_back(Store); ++GPR_idx; ArgOffset += PtrByteSize; @@ -3342,7 +3810,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( MachinePointerInfo(), false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store - SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); + SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } @@ -3420,8 +3888,9 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { SignExtend32<26>(Addr) != Addr) return nullptr; // Top 6 bits have to be sext of immediate. - return DAG.getConstant((int)C->getZExtValue() >> 2, - DAG.getTargetLoweringInfo().getPointerTy()).getNode(); + return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op), + DAG.getTargetLoweringInfo().getPointerTy( + DAG.getDataLayout())).getNode(); } namespace { @@ -3433,7 +3902,6 @@ struct TailCallArgumentInfo { TailCallArgumentInfo() : FrameIdx(0) {} }; - } /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. @@ -3448,9 +3916,10 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue FIN = TailCallArgs[i].FrameIdxOp; int FI = TailCallArgs[i].FrameIdx; // Store relative to framepointer. - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + MemOpChains.push_back(DAG.getStore( + Chain, dl, Arg, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, + false, 0)); } } @@ -3475,9 +3944,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, NewRetAddrLoc, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); - Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, - MachinePointerInfo::getFixedStack(NewRetAddr), - false, false, 0); + Chain = DAG.getStore( + Chain, dl, OldRetAddr, NewRetAddrFrIdx, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewRetAddr), + false, false, 0); // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack // slot as the FP is never overwritten. @@ -3486,9 +3956,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, true); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); - Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, - MachinePointerInfo::getFixedStack(NewFPIdx), - false, false, 0); + Chain = DAG.getStore( + Chain, dl, OldFP, NewFramePtrIdx, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewFPIdx), + false, false, 0); } } return Chain; @@ -3552,9 +4023,9 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, SDLoc dl) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - false, false, MachinePointerInfo(), + false, false, false, MachinePointerInfo(), MachinePointerInfo()); } @@ -3567,7 +4038,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, bool isVector, SmallVectorImpl &MemOpChains, SmallVectorImpl &TailCallArguments, SDLoc dl) { - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); if (!isTailCall) { if (isVector) { SDValue StackPtr; @@ -3576,7 +4047,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, else StackPtr = DAG.getRegister(PPC::R1, MVT::i32); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, - DAG.getConstant(ArgOffset, PtrVT)); + DAG.getConstant(ArgOffset, dl, PtrVT)); } MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); @@ -3607,8 +4078,8 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, isPPC64, isDarwinABI, dl); // Emit callseq_end just before tailcall node. - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag, dl); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), + DAG.getIntPtrConstant(0, dl, true), InFlag, dl); InFlag = Chain.getValue(1); } @@ -3629,7 +4100,7 @@ static bool isFunctionGlobalAddress(SDValue Callee) { static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff, - bool isTailCall, bool IsPatchPoint, + bool isTailCall, bool IsPatchPoint, bool hasNest, SmallVectorImpl > &RegsToPass, SmallVectorImpl &Ops, std::vector &NodeTys, ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { @@ -3638,7 +4109,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, bool isSVR4ABI = Subtarget.isSVR4ABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. @@ -3660,8 +4131,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if ((DAG.getTarget().getRelocationModel() != Reloc::Static && (Subtarget.getTargetTriple().isMacOSX() && Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && - (G->getGlobal()->isDeclaration() || - G->getGlobal()->isWeakForLinker())) || + !G->getGlobal()->isStrongDefinitionForLinker()) || (Subtarget.isTargetELF() && !isPPC64 && !G->getGlobal()->hasLocalLinkage() && DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { @@ -3754,13 +4224,13 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, false, false, LoadsInv, 8); // Load environment pointer into r11. - SDValue PtrOff = DAG.getIntPtrConstant(16); + SDValue PtrOff = DAG.getIntPtrConstant(16, dl); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), false, false, LoadsInv, 8); - SDValue TOCOff = DAG.getIntPtrConstant(8); + SDValue TOCOff = DAG.getIntPtrConstant(8, dl); SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), false, false, @@ -3772,11 +4242,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, Chain = TOCVal.getValue(0); InFlag = TOCVal.getValue(1); - SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, - InFlag); + // If the function call has an explicit 'nest' parameter, it takes the + // place of the environment pointer. + if (!hasNest) { + SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, + InFlag); - Chain = EnvVal.getValue(0); - InFlag = EnvVal.getValue(1); + Chain = EnvVal.getValue(0); + InFlag = EnvVal.getValue(1); + } MTCTROps[0] = Chain; MTCTROps[1] = LoadFuncPtr; @@ -3794,7 +4268,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) - if (isSVR4ABI && isPPC64 && !isELFv2ABI) + if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -3808,7 +4282,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, } // If this is a tail call add stack pointer delta. if (isTailCall) - Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); + Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); // Add argument registers to the end of the list so that they are known live // into the call. @@ -3830,8 +4304,7 @@ static bool isLocalCall(const SDValue &Callee) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) - return !G->getGlobal()->isDeclaration() && - !G->getGlobal()->isWeakForLinker(); + return G->getGlobal()->isStrongDefinitionForLinker(); return false; } @@ -3884,7 +4357,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SDValue PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, bool isVarArg, bool IsPatchPoint, - SelectionDAG &DAG, + bool hasNest, SelectionDAG &DAG, SmallVector, 8> &RegsToPass, SDValue InFlag, SDValue Chain, @@ -3897,8 +4370,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, std::vector NodeTys; SmallVector Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, - SPDiff, isTailCall, IsPatchPoint, RegsToPass, - Ops, NodeTys, CS, Subtarget); + SPDiff, isTailCall, IsPatchPoint, hasNest, + RegsToPass, Ops, NodeTys, CS, Subtarget); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) @@ -3913,7 +4386,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -3929,6 +4403,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, isa(Callee)) && "Expecting an global address, external symbol, absolute value or register"); + DAG.getMachineFunction().getFrameInfo()->setHasTailCall(); return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); } @@ -3955,10 +4430,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // allocated and an unnecessary move instruction being generated. CallOpc = PPCISD::BCTRL_LOAD_TOC; - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); - SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset); + SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); // The address needs to go after the chain input but before the flag (or @@ -3974,8 +4449,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(BytesCalleePops, true), + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), + DAG.getIntPtrConstant(BytesCalleePops, dl, true), InFlag, dl); if (!Ins.empty()) InFlag = Chain.getValue(1); @@ -4119,7 +4594,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); SDValue CallSeqStart = Chain; @@ -4159,8 +4634,9 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Memory reserved in the local variable space of the callers stack frame. unsigned LocMemOffset = ByValVA.getLocMemOffset(); - SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); - PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), + StackPtr, PtrOff); // Create a copy of the argument in the local area of the current // stack frame. @@ -4196,8 +4672,9 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, unsigned LocMemOffset = VA.getLocMemOffset(); if (!isTailCall) { - SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); - PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), + StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), @@ -4238,7 +4715,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, false, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, + /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } @@ -4277,8 +4755,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); + bool hasNest = false; - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); unsigned PtrByteSize = 8; MachineFunction &MF = DAG.getMachineFunction(); @@ -4302,13 +4781,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned NumBytes = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + unsigned &QFPR_idx = FPR_idx; static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -4321,6 +4799,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); + const unsigned NumQFPRs = NumFPRs; // When using the fast calling convention, we don't provide backing for // arguments that will be in registers. @@ -4332,6 +4811,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; + if (Flags.isNest()) + continue; + if (CallConv == CallingConv::Fast) { if (Flags.isByVal()) NumGPRsUsed += (Flags.getByValSize()+7)/8; @@ -4344,20 +4826,33 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (++NumGPRsUsed <= NumGPRs) continue; break; - case MVT::f32: - case MVT::f64: - if (++NumFPRsUsed <= NumFPRs) - continue; - break; - case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: + case MVT::v1i128: if (++NumVRsUsed <= NumVRs) continue; break; + case MVT::v4f32: + // When using QPX, this is handled like a FP register, otherwise, it + // is an Altivec register. + if (Subtarget.hasQPX()) { + if (++NumFPRsUsed <= NumFPRs) + continue; + } else { + if (++NumVRsUsed <= NumVRs) + continue; + } + break; + case MVT::f32: + case MVT::f64: + case MVT::v4f64: // QPX + case MVT::v4i1: // QPX + if (++NumFPRsUsed <= NumFPRs) + continue; + break; } } @@ -4397,7 +4892,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); SDValue CallSeqStart = Chain; @@ -4441,7 +4936,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; - PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); }; @@ -4498,7 +4993,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (GPR_idx == NumGPRs && Size < 8) { SDValue AddPtr = PtrOff; if (!isLittleEndian) { - SDValue Const = DAG.getConstant(PtrByteSize - Size, + SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, PtrOff.getValueType()); AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); } @@ -4538,7 +5033,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // parameter save area instead of a new local variable. SDValue AddPtr = PtrOff; if (!isLittleEndian) { - SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); + SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, @@ -4560,7 +5055,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // For aggregates larger than PtrByteSize, copy the pieces of the // object that fit into registers from the parameter save area. for (unsigned j=0; jgetTOCSaveOffset(); - SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset); + SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, - MachinePointerInfo::getStack(TOCSaveOffset), - false, false, 0); + Chain = DAG.getStore( + Val.getValue(1), dl, Val, AddPtr, + MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset), + false, false, 0); // In the ELFv2 ABI, R12 must contain the address of an indirect callee. // This does not mean the MTCTR instruction must use R12; it's easier // to model this as an extra parameter, so do that. @@ -4807,9 +5366,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, FPOp, true, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, - RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, - NumBytes, Ins, InVals, CS); + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, hasNest, + DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, + SPDiff, NumBytes, Ins, InVals, CS); } SDValue @@ -4825,7 +5384,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, unsigned NumOps = Outs.size(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; unsigned PtrByteSize = isPPC64 ? 8 : 4; @@ -4901,7 +5460,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); SDValue CallSeqStart = Chain; @@ -4935,8 +5494,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const MCPhysReg *FPR = GetFPR(); - static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -4959,7 +5516,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // register cannot be found for it. SDValue PtrOff; - PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); @@ -4988,7 +5545,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, ArgOffset += PtrByteSize; } else { - SDValue Const = DAG.getConstant(PtrByteSize - Size, + SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, PtrOff.getValueType()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, @@ -5009,7 +5566,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // copy the pieces of the object that fit into registers from the // parameter save area. for (unsigned j=0; jgetBasePtr(); MachineMemOperand *MMO = LD->getMemOperand(); - SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, - BasePtr, MVT::i8, MMO); + SDValue NewLD = + DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, + BasePtr, MVT::i8, MMO); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; @@ -5401,6 +5976,9 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + if (Op.getOperand(1).getValueType().isVector()) + return LowerVectorStore(Op, DAG); + assert(Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"); @@ -5414,7 +5992,8 @@ SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue Value = ST->getValue(); MachineMemOperand *MMO = ST->getMemOperand(); - Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), + Value); return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); } @@ -5442,6 +6021,10 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { if (!DAG.getTarget().Options.NoInfsFPMath || !DAG.getTarget().Options.NoNaNsFPMath) return Op; + // TODO: Propagate flags from the select rather than global settings. + SDNodeFlags Flags; + Flags.setNoInfs(true); + Flags.setNoNaNs(true); ISD::CondCode CC = cast(Op.getOperand(4))->get(); @@ -5492,7 +6075,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETNE: std::swap(TV, FV); case ISD::SETEQ: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); @@ -5502,25 +6085,25 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); case ISD::SETULT: case ISD::SETLT: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); @@ -5560,7 +6143,8 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); int FI = cast(FIPtr)->getIndex(); - MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); + MachinePointerInfo MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Emit a store to the stack slot. SDValue Chain; @@ -5579,7 +6163,7 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, // add in a bias. if (Op.getValueType() == MVT::i32 && !i32Stack) { FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, - DAG.getConstant(4, FIPtr.getValueType())); + DAG.getConstant(4, dl, FIPtr.getValueType())); MPI = MPI.getWithOffset(4); } @@ -5588,8 +6172,46 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, RLI.MPI = MPI; } +/// \brief Custom lowers floating point to integer conversions to use +/// the direct move instructions available in ISA 2.07 to avoid the +/// need for load/store combinations. +SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, + SelectionDAG &DAG, + SDLoc dl) const { + assert(Op.getOperand(0).getValueType().isFloatingPoint()); + SDValue Src = Op.getOperand(0); + + if (Src.getValueType() == MVT::f32) + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + + SDValue Tmp; + switch (Op.getSimpleValueType().SimpleTy) { + default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); + case MVT::i32: + Tmp = DAG.getNode( + Op.getOpcode() == ISD::FP_TO_SINT + ? PPCISD::FCTIWZ + : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), + dl, MVT::f64, Src); + Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); + break; + case MVT::i64: + assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && + "i64 FP_TO_UINT is supported only with FPCVT"); + Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : + PPCISD::FCTIDUZ, + dl, MVT::f64, Src); + Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); + break; + } + return Tmp; +} + SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const { + if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) + return LowerFP_TO_INTDirectMove(Op, DAG, dl); + ReuseLoadInfo RLI; LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); @@ -5667,17 +6289,78 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain, DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); } +/// \brief Custom lowers integer to floating point conversions to use +/// the direct move instructions available in ISA 2.07 to avoid the +/// need for load/store combinations. +SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, + SelectionDAG &DAG, + SDLoc dl) const { + assert((Op.getValueType() == MVT::f32 || + Op.getValueType() == MVT::f64) && + "Invalid floating point type as target of conversion"); + assert(Subtarget.hasFPCVT() && + "Int to FP conversions with direct moves require FPCVT"); + SDValue FP; + SDValue Src = Op.getOperand(0); + bool SinglePrec = Op.getValueType() == MVT::f32; + bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; + bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; + unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : + (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); + + if (WordInt) { + FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, + dl, MVT::f64, Src); + FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); + } + else { + FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); + FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); + } + + return FP; +} + SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + + if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { + if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) + return SDValue(); + + SDValue Value = Op.getOperand(0); + // The values are now known to be -1 (false) or 1 (true). To convert this + // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). + // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 + Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); + + SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64); + FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, FPHalfs, FPHalfs, + FPHalfs, FPHalfs); + + Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); + + if (Op.getValueType() != MVT::v4f64) + Value = DAG.getNode(ISD::FP_ROUND, dl, + Op.getValueType(), Value, + DAG.getIntPtrConstant(1, dl)); + return Value; + } + // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); if (Op.getOperand(0).getValueType() == MVT::i1) return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), - DAG.getConstantFP(1.0, Op.getValueType()), - DAG.getConstantFP(0.0, Op.getValueType())); + DAG.getConstantFP(1.0, dl, Op.getValueType()), + DAG.getConstantFP(0.0, dl, Op.getValueType())); + + // If we have direct moves, we can do all the conversion, skip the store/load + // however, without FPCVT we can't do most conversions. + if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT()) + return LowerINT_TO_FPDirectMove(Op, DAG, dl); assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); @@ -5716,12 +6399,12 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // bit 12 (value 2048) is set instead, so that the final rounding // to single-precision gets the correct result. SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, - SINT, DAG.getConstant(2047, MVT::i64)); + SINT, DAG.getConstant(2047, dl, MVT::i64)); Round = DAG.getNode(ISD::ADD, dl, MVT::i64, - Round, DAG.getConstant(2047, MVT::i64)); + Round, DAG.getConstant(2047, dl, MVT::i64)); Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); Round = DAG.getNode(ISD::AND, dl, MVT::i64, - Round, DAG.getConstant(-2048, MVT::i64)); + Round, DAG.getConstant(-2048, dl, MVT::i64)); // However, we cannot use that value unconditionally: if the magnitude // of the input value is small, the bit-twiddling we did above might @@ -5732,11 +6415,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // bits are all sign-bit copies, and use the rounded value computed // above otherwise. SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, - SINT, DAG.getConstant(53, MVT::i32)); + SINT, DAG.getConstant(53, dl, MVT::i32)); Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, - Cond, DAG.getConstant(1, MVT::i64)); + Cond, DAG.getConstant(1, dl, MVT::i64)); Cond = DAG.getSetCC(dl, MVT::i32, - Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); + Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); } @@ -5776,22 +6459,23 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SINT.getOpcode() == ISD::ZERO_EXTEND)) && SINT.getOperand(0).getValueType() == MVT::i32) { MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, - MachinePointerInfo::getFixedStack(FrameIdx), - false, false, 0); + SDValue Store = DAG.getStore( + DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), + false, false, 0); assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; RLI.Chain = Store; - RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); + RLI.MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = 4; MachineMemOperand *MMO = @@ -5809,7 +6493,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, - MVT::f32, FP, DAG.getIntPtrConstant(0)); + MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); return FP; } @@ -5821,7 +6505,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); SDValue Ld; if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { @@ -5832,16 +6516,18 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, - MachinePointerInfo::getFixedStack(FrameIdx), - false, false, 0); + SDValue Store = DAG.getStore( + DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), + false, false, 0); assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; RLI.Chain = Store; - RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); + RLI.MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = 4; } @@ -5866,20 +6552,23 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, Op.getOperand(0)); // STD the extended value into the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, - MachinePointerInfo::getFixedStack(FrameIdx), - false, false, 0); + SDValue Store = DAG.getStore( + DAG.getEntryNode(), dl, Ext64, FIdx, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), + false, false, 0); // Load the value as a double. - Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, - MachinePointerInfo::getFixedStack(FrameIdx), - false, false, false, 0); + Ld = DAG.getLoad( + MVT::f64, dl, Store, FIdx, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), + false, false, false, 0); } // FCFID it and return it. SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) - FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); + FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, + DAG.getIntPtrConstant(0, dl)); return FP; } @@ -5907,7 +6596,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Save FP Control Word to register EVT NodeTys[] = { @@ -5923,7 +6612,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, StackSlot, MachinePointerInfo(), false, false,0); // Load FP Control Word from low 32 bits of stack slot. - SDValue Four = DAG.getConstant(4, PtrVT); + SDValue Four = DAG.getConstant(4, dl, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), false, false, false, 0); @@ -5931,14 +6620,14 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, // Transform as necessary SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, - CWD, DAG.getConstant(3, MVT::i32)); + CWD, DAG.getConstant(3, dl, MVT::i32)); SDValue CWD2 = DAG.getNode(ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::AND, dl, MVT::i32, DAG.getNode(ISD::XOR, dl, MVT::i32, - CWD, DAG.getConstant(3, MVT::i32)), - DAG.getConstant(3, MVT::i32)), - DAG.getConstant(1, MVT::i32)); + CWD, DAG.getConstant(3, dl, MVT::i32)), + DAG.getConstant(3, dl, MVT::i32)), + DAG.getConstant(1, dl, MVT::i32)); SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); @@ -5963,12 +6652,12 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, - DAG.getConstant(BitWidth, AmtVT), Amt); + DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, - DAG.getConstant(-BitWidth, AmtVT)); + DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); @@ -5992,12 +6681,12 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, - DAG.getConstant(BitWidth, AmtVT), Amt); + DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, - DAG.getConstant(-BitWidth, AmtVT)); + DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); @@ -6020,15 +6709,15 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, - DAG.getConstant(BitWidth, AmtVT), Amt); + DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, - DAG.getConstant(-BitWidth, AmtVT)); + DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); - SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), + SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), Tmp4, Tmp6, ISD::SETLE); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); @@ -6044,7 +6733,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, SDLoc dl) { assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); - static const EVT VTys[] = { // canonical VT to use for each size. + static const MVT VTys[] = { // canonical VT to use for each size. MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 }; @@ -6057,7 +6746,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, EVT CanonicalVT = VTys[SplatSize-1]; // Build a canonical splat for this value. - SDValue Elt = DAG.getConstant(Val, MVT::i32); + SDValue Elt = DAG.getConstant(Val, dl, MVT::i32); SmallVector Ops; Ops.assign(CanonicalVT.getVectorNumElements(), Elt); SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops); @@ -6071,7 +6760,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, - DAG.getConstant(IID, MVT::i32), Op); + DAG.getConstant(IID, dl, MVT::i32), Op); } /// BuildIntrinsicOp - Return a binary operator intrinsic node with the @@ -6081,7 +6770,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = LHS.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, - DAG.getConstant(IID, MVT::i32), LHS, RHS); + DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); } /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the @@ -6091,10 +6780,9 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, SDLoc dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op0.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, - DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); + DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); } - /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified /// amount. The result has the specified value type. static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, @@ -6121,12 +6809,134 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { + // We first build an i32 vector, load it into a QPX register, + // then convert it to a floating-point vector and compare it + // to a zero vector to get the boolean result. + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + assert(BVN->getNumOperands() == 4 && + "BUILD_VECTOR for v4i1 does not have 4 operands"); + + bool IsConst = true; + for (unsigned i = 0; i < 4; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (!isa(BVN->getOperand(i))) { + IsConst = false; + break; + } + } + + if (IsConst) { + Constant *One = + ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); + Constant *NegOne = + ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); + + SmallVector CV(4, NegOne); + for (unsigned i = 0; i < 4; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) + CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); + else if (isNullConstant(BVN->getOperand(i))) + continue; + else + CV[i] = One; + } + + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), + 16 /* alignment */); + + SmallVector Ops; + Ops.push_back(DAG.getEntryNode()); + Ops.push_back(CPIdx); + + SmallVector ValueVTs; + ValueVTs.push_back(MVT::v4i1); + ValueVTs.push_back(MVT::Other); // chain + SDVTList VTs = DAG.getVTList(ValueVTs); + + return DAG.getMemIntrinsicNode( + PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + } + + SmallVector Stores; + for (unsigned i = 0; i < 4; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + unsigned Offset = 4*i; + SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); + + unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); + if (StoreSize > 4) { + Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, + BVN->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), + MVT::i32, false, false, 0)); + } else { + SDValue StoreValue = BVN->getOperand(i); + if (StoreSize < 4) + StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); + + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, + StoreValue, Idx, + PtrInfo.getWithOffset(Offset), + false, false, 0)); + } + } + + SDValue StoreChain; + if (!Stores.empty()) + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + else + StoreChain = DAG.getEntryNode(); + + // Now load from v4i32 into the QPX register; this will extend it to + // v4i64 but not yet convert it to a floating point. Nevertheless, this + // is typed as v4f64 because the QPX register integer states are not + // explicitly represented. + + SmallVector Ops; + Ops.push_back(StoreChain); + Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32)); + Ops.push_back(FIdx); + + SmallVector ValueVTs; + ValueVTs.push_back(MVT::v4f64); + ValueVTs.push_back(MVT::Other); // chain + SDVTList VTs = DAG.getVTList(ValueVTs); + + SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, + dl, VTs, Ops, MVT::v4i32, PtrInfo); + LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, + DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32), + LoadedVect); + + SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::f64); + FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, + FPZeros, FPZeros, FPZeros, FPZeros); + + return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); + } + + // All other QPX vectors are handled by generic code. + if (Subtarget.hasQPX()) + return SDValue(); + // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, 0, true) || SplatBitSize > 32) + HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || + SplatBitSize > 32) return SDValue(); unsigned SplatBits = APSplatBits.getZExtValue(); @@ -6139,7 +6949,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (SplatBits == 0) { // Canonicalize all zero vectors to be v4i32. if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { - SDValue Z = DAG.getConstant(0, MVT::i32); + SDValue Z = DAG.getConstant(0, dl, MVT::i32); Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); } @@ -6152,7 +6962,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (SextVal >= -16 && SextVal <= 15) return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); - // Two instruction sequences. // If this value is in the range [-32,30] and is even, use: @@ -6166,10 +6975,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // To avoid having these optimizations undone by constant folding, // we convert to a pseudo that will be expanded later into one of // the above forms. - SDValue Elt = DAG.getConstant(SextVal, MVT::i32); + SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); EVT VT = (SplatSize == 1 ? MVT::v16i8 : (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); - SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32); + SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); if (VT == Op.getValueType()) return RetVal; @@ -6193,22 +7002,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } - // The remaining cases assume either big endian element order or - // a splat-size that equates to the element size of the vector - // to be built. An example that doesn't work for little endian is - // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits - // and a vector element size of 16 bits. The code below will - // produce the vector in big endian element order, which for little - // endian is {-1, 0, -1, 0, -1, 0, -1, 0}. - - // For now, just avoid these optimizations in that case. - // FIXME: Develop correct optimizations for LE with mismatched - // splat and element sizes. - - if (Subtarget.isLittleEndian() && - SplatSize != Op.getValueType().getVectorElementType().getSizeInBits()) - return SDValue(); - // Check to see if this is a wide variety of vsplti*, binop self cases. static const signed char SplatCsts[] = { -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, @@ -6272,17 +7065,20 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // t = vsplti c, result = vsldoi t, t, 1 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); - return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); + unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; + return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); - return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); + unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; + return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); - return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); + unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; + return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } } @@ -6379,6 +7175,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, EVT VT = Op.getValueType(); bool isLittleEndian = Subtarget.isLittleEndian(); + if (Subtarget.hasQPX()) { + if (VT.getVectorNumElements() != 4) + return SDValue(); + + if (V2.getOpcode() == ISD::UNDEF) V2 = V1; + + int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); + if (AlignIdx != -1) { + return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, + DAG.getConstant(AlignIdx, dl, MVT::i32)); + } else if (SVOp->isSplat()) { + int SplatIdx = SVOp->getSplatIndex(); + if (SplatIdx >= 4) { + std::swap(V1, V2); + SplatIdx -= 4; + } + + // FIXME: If SplatIdx == 0 and the input came from a load, then there is + // nothing to do. + + return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, + DAG.getConstant(SplatIdx, dl, MVT::i32)); + } + + // Lower this into a qvgpci/qvfperm pair. + + // Compute the qvgpci literal + unsigned idx = 0; + for (unsigned i = 0; i < 4; ++i) { + int m = SVOp->getMaskElt(i); + unsigned mm = m >= 0 ? (unsigned) m : i; + idx |= mm << (3-i)*3; + } + + SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, + DAG.getConstant(idx, dl, MVT::i32)); + return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); + } + // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // selected by the instruction selector. @@ -6394,7 +7229,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) { + PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || + (Subtarget.hasP8Altivec() && ( + PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { return Op; } } @@ -6411,7 +7250,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG)) + PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || + (Subtarget.hasP8Altivec() && ( + PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our @@ -6490,10 +7333,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, for (unsigned j = 0; j != BytesPerElement; ++j) if (isLittleEndian) - ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j), - MVT::i32)); + ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), + dl, MVT::i32)); else - ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, MVT::i32)); } @@ -6507,11 +7350,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, V1, V2, VPermMask); } -/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an -/// altivec comparison. If it is, return true and fill in Opc/isDot with +/// getVectorCompareInfo - Given an intrinsic, return false if it is not a +/// vector comparison. If it is, return true and fill in Opc/isDot with /// information about the intrinsic. -static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, - bool &isDot) { +static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, + bool &isDot, const PPCSubtarget &Subtarget) { unsigned IntrinsicID = cast(Intrin.getOperand(0))->getZExtValue(); CompareOpc = -1; @@ -6524,14 +7367,60 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpequd_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 199; + isDot = 1; + } else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtsd_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 967; + isDot = 1; + } else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtud_p: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 711; + isDot = 1; + } else + return false; + + break; + // VSX predicate comparisons use the same infrastructure + case Intrinsic::ppc_vsx_xvcmpeqdp_p: + case Intrinsic::ppc_vsx_xvcmpgedp_p: + case Intrinsic::ppc_vsx_xvcmpgtdp_p: + case Intrinsic::ppc_vsx_xvcmpeqsp_p: + case Intrinsic::ppc_vsx_xvcmpgesp_p: + case Intrinsic::ppc_vsx_xvcmpgtsp_p: + if (Subtarget.hasVSX()) { + switch (IntrinsicID) { + case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; + case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; + case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; + case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; + case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; + case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; + } + isDot = 1; + } + else + return false; + + break; // Normal Comparisons. case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; @@ -6539,14 +7428,38 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpequd: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 199; + isDot = 0; + } else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtsd: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 967; + isDot = 0; + } else + return false; + + break; case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtud: + if (Subtarget.hasP8Altivec()) { + CompareOpc = 711; + isDot = 0; + } else + return false; + + break; } return true; } @@ -6560,14 +7473,14 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDLoc dl(Op); int CompareOpc; bool isDot; - if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) + if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) return SDValue(); // Don't custom lower most intrinsics. // If this is a non-dot comparison, make the VCMP node and we are done. if (!isDot) { SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), Op.getOperand(1), Op.getOperand(2), - DAG.getConstant(CompareOpc, MVT::i32)); + DAG.getConstant(CompareOpc, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); } @@ -6575,7 +7488,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue Ops[] = { Op.getOperand(2), // LHS Op.getOperand(3), // RHS - DAG.getConstant(CompareOpc, MVT::i32) + DAG.getConstant(CompareOpc, dl, MVT::i32) }; EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); @@ -6607,15 +7520,15 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // Shift the bit into the low position. Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, - DAG.getConstant(8-(3-BitNo), MVT::i32)); + DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); // Isolate the bit. Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, dl, MVT::i32)); // If we are supposed to, toggle the bit. if (InvertBit) Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, dl, MVT::i32)); return Flags; } @@ -6649,7 +7562,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); // Store the input value into Value#0 of the stack slot. @@ -6661,6 +7574,305 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, false, false, false, 0); } +SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + SDNode *N = Op.getNode(); + + assert(N->getOperand(0).getValueType() == MVT::v4i1 && + "Unknown extract_vector_elt type"); + + SDValue Value = N->getOperand(0); + + // The first part of this is like the store lowering except that we don't + // need to track the chain. + + // The values are now known to be -1 (false) or 1 (true). To convert this + // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). + // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 + Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); + + // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to + // understand how to form the extending load. + SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64); + FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, + FPHalfs, FPHalfs, FPHalfs, FPHalfs); + + Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); + + // Now convert to an integer and store. + Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, + DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), + Value); + + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue StoreChain = DAG.getEntryNode(); + SmallVector Ops; + Ops.push_back(StoreChain); + Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32)); + Ops.push_back(Value); + Ops.push_back(FIdx); + + SmallVector ValueVTs; + ValueVTs.push_back(MVT::Other); // chain + SDVTList VTs = DAG.getVTList(ValueVTs); + + StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, + dl, VTs, Ops, MVT::v4i32, PtrInfo); + + // Extract the value requested. + unsigned Offset = 4*cast(N->getOperand(1))->getZExtValue(); + SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); + + SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, + PtrInfo.getWithOffset(Offset), + false, false, false, 0); + + if (!Subtarget.useCRBits()) + return IntVal; + + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); +} + +/// Lowering for QPX v4i1 loads +SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + LoadSDNode *LN = cast(Op.getNode()); + SDValue LoadChain = LN->getChain(); + SDValue BasePtr = LN->getBasePtr(); + + if (Op.getValueType() == MVT::v4f64 || + Op.getValueType() == MVT::v4f32) { + EVT MemVT = LN->getMemoryVT(); + unsigned Alignment = LN->getAlignment(); + + // If this load is properly aligned, then it is legal. + if (Alignment >= MemVT.getStoreSize()) + return Op; + + EVT ScalarVT = Op.getValueType().getScalarType(), + ScalarMemVT = MemVT.getScalarType(); + unsigned Stride = ScalarMemVT.getStoreSize(); + + SmallVector Vals, LoadChains; + for (unsigned Idx = 0; Idx < 4; ++Idx) { + SDValue Load; + if (ScalarVT != ScalarMemVT) + Load = + DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, + BasePtr, + LN->getPointerInfo().getWithOffset(Idx*Stride), + ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(), + LN->isInvariant(), MinAlign(Alignment, Idx*Stride), + LN->getAAInfo()); + else + Load = + DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, + LN->getPointerInfo().getWithOffset(Idx*Stride), + LN->isVolatile(), LN->isNonTemporal(), + LN->isInvariant(), MinAlign(Alignment, Idx*Stride), + LN->getAAInfo()); + + if (Idx == 0 && LN->isIndexed()) { + assert(LN->getAddressingMode() == ISD::PRE_INC && + "Unknown addressing mode on vector load"); + Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), + LN->getAddressingMode()); + } + + Vals.push_back(Load); + LoadChains.push_back(Load.getValue(1)); + + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Stride, dl, + BasePtr.getValueType())); + } + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, + Op.getValueType(), Vals); + + if (LN->isIndexed()) { + SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; + return DAG.getMergeValues(RetOps, dl); + } + + SDValue RetOps[] = { Value, TF }; + return DAG.getMergeValues(RetOps, dl); + } + + assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); + assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); + + // To lower v4i1 from a byte array, we load the byte elements of the + // vector and then reuse the BUILD_VECTOR logic. + + SmallVector VectElmts, VectElmtChains; + for (unsigned i = 0; i < 4; ++i) { + SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); + + VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD, + dl, MVT::i32, LoadChain, Idx, + LN->getPointerInfo().getWithOffset(i), + MVT::i8 /* memory type */, + LN->isVolatile(), LN->isNonTemporal(), + LN->isInvariant(), + 1 /* alignment */, LN->getAAInfo())); + VectElmtChains.push_back(VectElmts[i].getValue(1)); + } + + LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts); + + SDValue RVals[] = { Value, LoadChain }; + return DAG.getMergeValues(RVals, dl); +} + +/// Lowering for QPX v4i1 stores +SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + StoreSDNode *SN = cast(Op.getNode()); + SDValue StoreChain = SN->getChain(); + SDValue BasePtr = SN->getBasePtr(); + SDValue Value = SN->getValue(); + + if (Value.getValueType() == MVT::v4f64 || + Value.getValueType() == MVT::v4f32) { + EVT MemVT = SN->getMemoryVT(); + unsigned Alignment = SN->getAlignment(); + + // If this store is properly aligned, then it is legal. + if (Alignment >= MemVT.getStoreSize()) + return Op; + + EVT ScalarVT = Value.getValueType().getScalarType(), + ScalarMemVT = MemVT.getScalarType(); + unsigned Stride = ScalarMemVT.getStoreSize(); + + SmallVector Stores; + for (unsigned Idx = 0; Idx < 4; ++Idx) { + SDValue Ex = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, + DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); + SDValue Store; + if (ScalarVT != ScalarMemVT) + Store = + DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, + SN->getPointerInfo().getWithOffset(Idx*Stride), + ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(), + MinAlign(Alignment, Idx*Stride), SN->getAAInfo()); + else + Store = + DAG.getStore(StoreChain, dl, Ex, BasePtr, + SN->getPointerInfo().getWithOffset(Idx*Stride), + SN->isVolatile(), SN->isNonTemporal(), + MinAlign(Alignment, Idx*Stride), SN->getAAInfo()); + + if (Idx == 0 && SN->isIndexed()) { + assert(SN->getAddressingMode() == ISD::PRE_INC && + "Unknown addressing mode on vector store"); + Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), + SN->getAddressingMode()); + } + + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Stride, dl, + BasePtr.getValueType())); + Stores.push_back(Store); + } + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + + if (SN->isIndexed()) { + SDValue RetOps[] = { TF, Stores[0].getValue(1) }; + return DAG.getMergeValues(RetOps, dl); + } + + return TF; + } + + assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); + assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); + + // The values are now known to be -1 (false) or 1 (true). To convert this + // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). + // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 + Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); + + // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to + // understand how to form the extending load. + SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64); + FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, + FPHalfs, FPHalfs, FPHalfs, FPHalfs); + + Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); + + // Now convert to an integer and store. + Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, + DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32), + Value); + + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SmallVector Ops; + Ops.push_back(StoreChain); + Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32)); + Ops.push_back(Value); + Ops.push_back(FIdx); + + SmallVector ValueVTs; + ValueVTs.push_back(MVT::Other); // chain + SDVTList VTs = DAG.getVTList(ValueVTs); + + StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, + dl, VTs, Ops, MVT::v4i32, PtrInfo); + + // Move data into the byte array. + SmallVector Loads, LoadChains; + for (unsigned i = 0; i < 4; ++i) { + unsigned Offset = 4*i; + SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); + + Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx, + PtrInfo.getWithOffset(Offset), + false, false, false, 0)); + LoadChains.push_back(Loads[i].getValue(1)); + } + + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + + SmallVector Stores; + for (unsigned i = 0; i < 4; ++i) { + SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); + + Stores.push_back(DAG.getTruncStore( + StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), + MVT::i8 /* memory type */, SN->isNonTemporal(), SN->isVolatile(), + 1 /* alignment */, SN->getAAInfo())); + } + + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + + return StoreChain; +} + SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (Op.getValueType() == MVT::v4i32) { @@ -6757,6 +7969,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); + case ISD::GET_DYNAMIC_AREA_OFFSET: return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG, Subtarget); case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); @@ -6783,6 +7996,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); // For counter-based loop handling. @@ -6817,10 +8031,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, assert(N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"); - EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); + EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + N->getValueType(0)); SDVTList VTs = DAG.getVTList(SVT, MVT::Other); SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), - N->getOperand(1)); + N->getOperand(1)); Results.push_back(NewInt); Results.push_back(NewInt.getValue(1)); @@ -6845,10 +8060,10 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, assert(N->getOperand(0).getValueType() == MVT::ppcf128); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, N->getOperand(0), - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, N->getOperand(0), - DAG.getIntPtrConstant(1)); + DAG.getIntPtrConstant(1, dl)); // Add the two halves of the long double in round-to-zero mode. SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); @@ -6860,6 +8075,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. if (N->getOperand(0).getValueType() == MVT::ppcf128) return; @@ -6868,7 +8084,6 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, } } - //===----------------------------------------------------------------------===// // Other Lowering Code //===----------------------------------------------------------------------===// @@ -6876,7 +8091,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Function *Func = Intrinsic::getDeclaration(M, Id); - return Builder.CreateCall(Func); + return Builder.CreateCall(Func, {}); } // The mappings for emitLeading/TrailingFence is taken from @@ -6886,10 +8101,9 @@ Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, bool IsLoad) const { if (Ord == SequentiallyConsistent) return callIntrinsic(Builder, Intrinsic::ppc_sync); - else if (isAtLeastRelease(Ord)) + if (isAtLeastRelease(Ord)) return callIntrinsic(Builder, Intrinsic::ppc_lwsync); - else - return nullptr; + return nullptr; } Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, @@ -6901,20 +8115,44 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. - else - return nullptr; + return nullptr; } MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - bool is64bit, unsigned BinOpcode) const { + unsigned AtomicSize, + unsigned BinOpcode) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + auto LoadMnemonic = PPC::LDARX; + auto StoreMnemonic = PPC::STDCX; + switch (AtomicSize) { + default: + llvm_unreachable("Unexpected size of atomic entity"); + case 1: + LoadMnemonic = PPC::LBARX; + StoreMnemonic = PPC::STBCX; + assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); + break; + case 2: + LoadMnemonic = PPC::LHARX; + StoreMnemonic = PPC::STHCX; + assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); + break; + case 4: + LoadMnemonic = PPC::LWARX; + StoreMnemonic = PPC::STWCX; + break; + case 8: + LoadMnemonic = PPC::LDARX; + StoreMnemonic = PPC::STDCX; + break; + } + const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI->getOperand(0).getReg(); unsigned ptrA = MI->getOperand(1).getReg(); @@ -6932,7 +8170,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned TmpReg = (!BinOpcode) ? incr : - RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass + RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); // thisMBB: @@ -6947,11 +8185,11 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); @@ -6969,6 +8207,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, bool is8bit, // operation unsigned BinOpcode) const { + // If we support part-word atomic mnemonics, just use them + if (Subtarget.hasPartwordAtomics()) + return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // In 64 bit mode we have to use 64 bits for addresses, even though the @@ -6980,8 +8222,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI->getOperand(0).getReg(); unsigned ptrA = MI->getOperand(1).getReg(); @@ -7103,8 +8344,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); - MachineFunction::iterator I = MBB; - ++I; + MachineFunction::iterator I = ++MBB->getIterator(); // Memory Reference MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); @@ -7116,7 +8356,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); - MVT PVT = getPointerTy(); + MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); // For v = setjmp(buf), we generate @@ -7181,8 +8421,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // Naked functions never have a base pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned BaseReg; - if (MF->getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::Naked)) + if (MF->getFunction()->hasFnAttribute(Attribute::Naked)) BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; else BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; @@ -7205,8 +8444,8 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, .addMBB(mainMBB); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); - thisMBB->addSuccessor(mainMBB, /* weight */ 0); - thisMBB->addSuccessor(sinkMBB, /* weight */ 1); + thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); + thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); // mainMBB: // mainDstReg = 0 @@ -7255,7 +8494,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); - MVT PVT = getPointerTy(); + MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); @@ -7383,8 +8622,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // To "insert" these instructions we actually have to insert their // control-flow patterns. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); MachineFunction *F = BB->getParent(); @@ -7408,15 +8646,23 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || MI->getOpcode() == PPC::SELECT_CC_F8 || + MI->getOpcode() == PPC::SELECT_CC_QFRC || + MI->getOpcode() == PPC::SELECT_CC_QSRC || + MI->getOpcode() == PPC::SELECT_CC_QBRC || MI->getOpcode() == PPC::SELECT_CC_VRRC || MI->getOpcode() == PPC::SELECT_CC_VSFRC || + MI->getOpcode() == PPC::SELECT_CC_VSSRC || MI->getOpcode() == PPC::SELECT_CC_VSRC || MI->getOpcode() == PPC::SELECT_I4 || MI->getOpcode() == PPC::SELECT_I8 || MI->getOpcode() == PPC::SELECT_F4 || MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_QFRC || + MI->getOpcode() == PPC::SELECT_QSRC || + MI->getOpcode() == PPC::SELECT_QBRC || MI->getOpcode() == PPC::SELECT_VRRC || MI->getOpcode() == PPC::SELECT_VSFRC || + MI->getOpcode() == PPC::SELECT_VSSRC || MI->getOpcode() == PPC::SELECT_VSRC) { // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to @@ -7448,8 +8694,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_I8 || MI->getOpcode() == PPC::SELECT_F4 || MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_QFRC || + MI->getOpcode() == PPC::SELECT_QSRC || + MI->getOpcode() == PPC::SELECT_QBRC || MI->getOpcode() == PPC::SELECT_VRRC || MI->getOpcode() == PPC::SELECT_VSFRC || + MI->getOpcode() == PPC::SELECT_VSSRC || MI->getOpcode() == PPC::SELECT_VSRC) { BuildMI(BB, dl, TII->get(PPC::BC)) .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); @@ -7484,7 +8734,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // mfspr Rx,TBU # load from TBU // mfspr Ry,TB # load from TB // mfspr Rz,TBU # load from TBU - // cmpw crX,Rx,Rz # check if ‘old’=’new’ + // cmpw crX,Rx,Rz # check if 'old'='new' // bne readLoop # branch if they're not equal // ... @@ -7526,68 +8776,96 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); + BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::AND); + BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::OR); + BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); + BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); + BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); + BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) - BB = EmitAtomicBinary(MI, BB, false, 0); + BB = EmitAtomicBinary(MI, BB, 4, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) - BB = EmitAtomicBinary(MI, BB, true, 0); + BB = EmitAtomicBinary(MI, BB, 8, 0); else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || - MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || + (Subtarget.hasPartwordAtomics() && + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || + (Subtarget.hasPartwordAtomics() && + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; + auto LoadMnemonic = PPC::LDARX; + auto StoreMnemonic = PPC::STDCX; + switch(MI->getOpcode()) { + default: + llvm_unreachable("Compare and swap of unknown size"); + case PPC::ATOMIC_CMP_SWAP_I8: + LoadMnemonic = PPC::LBARX; + StoreMnemonic = PPC::STBCX; + assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); + break; + case PPC::ATOMIC_CMP_SWAP_I16: + LoadMnemonic = PPC::LHARX; + StoreMnemonic = PPC::STHCX; + assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); + break; + case PPC::ATOMIC_CMP_SWAP_I32: + LoadMnemonic = PPC::LWARX; + StoreMnemonic = PPC::STWCX; + break; + case PPC::ATOMIC_CMP_SWAP_I64: + LoadMnemonic = PPC::LDARX; + StoreMnemonic = PPC::STDCX; + break; + } unsigned dest = MI->getOperand(0).getReg(); unsigned ptrA = MI->getOperand(1).getReg(); unsigned ptrB = MI->getOperand(2).getReg(); @@ -7613,18 +8891,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(loop1MBB); // loop1MBB: - // l[wd]arx dest, ptr + // l[bhwd]arx dest, ptr // cmp[wd] dest, oldval // bne- midMBB // loop2MBB: - // st[wd]cx. newval, ptr + // st[bhwd]cx. newval, ptr // bne- loopMBB // b exitBB // midMBB: - // st[wd]cx. dest, ptr + // st[bhwd]cx. dest, ptr // exitBB: BB = loop1MBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) .addReg(oldval).addReg(dest); @@ -7634,7 +8912,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(midMBB); BB = loop2MBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(newval).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); @@ -7643,7 +8921,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(exitMBB); BB = midMBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(dest).addReg(ptrA).addReg(ptrB); BB->addSuccessor(exitMBB); @@ -7843,6 +9121,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); + } else if (MI->getOpcode() == PPC::TCHECK_RET) { + DebugLoc Dl = MI->getDebugLoc(); + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); + return BB; } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -7855,6 +9139,19 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Target Optimization Hooks //===----------------------------------------------------------------------===// +static std::string getRecipOp(const char *Base, EVT VT) { + std::string RecipOp(Base); + if (VT.getScalarType() == MVT::f64) + RecipOp += "d"; + else + RecipOp += "f"; + + if (VT.isVector()) + RecipOp = "vec-" + RecipOp; + + return RecipOp; +} + SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps, @@ -7863,14 +9160,15 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || - (VT == MVT::v2f64 && Subtarget.hasVSX())) { - // Convergence is quadratic, so we essentially double the number of digits - // correct after every iteration. For both FRE and FRSQRTE, the minimum - // architected relative accuracy is 2^-5. When hasRecipPrec(), this is - // 2^-14. IEEE float has 23 digits and double has 52 digits. - RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; - if (VT.getScalarType() == MVT::f64) - ++RefinementSteps; + (VT == MVT::v2f64 && Subtarget.hasVSX()) || + (VT == MVT::v4f32 && Subtarget.hasQPX()) || + (VT == MVT::v4f64 && Subtarget.hasQPX())) { + TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; + std::string RecipOp = getRecipOp("sqrt", VT); + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + RefinementSteps = Recips.getRefinementSteps(RecipOp); UseOneConstNR = true; return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } @@ -7884,20 +9182,21 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, if ((VT == MVT::f32 && Subtarget.hasFRES()) || (VT == MVT::f64 && Subtarget.hasFRE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || - (VT == MVT::v2f64 && Subtarget.hasVSX())) { - // Convergence is quadratic, so we essentially double the number of digits - // correct after every iteration. For both FRE and FRSQRTE, the minimum - // architected relative accuracy is 2^-5. When hasRecipPrec(), this is - // 2^-14. IEEE float has 23 digits and double has 52 digits. - RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; - if (VT.getScalarType() == MVT::f64) - ++RefinementSteps; + (VT == MVT::v2f64 && Subtarget.hasVSX()) || + (VT == MVT::v4f32 && Subtarget.hasQPX()) || + (VT == MVT::v4f64 && Subtarget.hasQPX())) { + TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; + std::string RecipOp = getRecipOp("div", VT); + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + RefinementSteps = Recips.getRefinementSteps(RecipOp); return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); } return SDValue(); } -bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { +unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { // Note: This functionality is used only when unsafe-fp-math is enabled, and // on cores with reciprocal estimates (which are used when unsafe-fp-math is // enabled for division), this functionality is redundant with the default @@ -7910,12 +9209,26 @@ bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { // one FP pipeline) for three or more FDIVs (for generic OOO cores). switch (Subtarget.getDarwinDirective()) { default: - return NumUsers > 2; + return 3; case PPC::DIR_440: case PPC::DIR_A2: case PPC::DIR_E500mc: case PPC::DIR_E5500: - return NumUsers > 1; + return 2; + } +} + +// isConsecutiveLSLoc needs to work even if all adds have not yet been +// collapsed, and so we need to look through chains of them. +static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, + int64_t& Offset, SelectionDAG &DAG) { + if (DAG.isBaseWithConstantOffset(Loc)) { + Base = Loc.getOperand(0); + Offset += cast(Loc.getOperand(1))->getSExtValue(); + + // The base might itself be a base plus an offset, and if so, accumulate + // that as well. + getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); } } @@ -7938,16 +9251,18 @@ static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); } - // Handle X+C - if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && - cast(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) + SDValue Base1 = Loc, Base2 = BaseLoc; + int64_t Offset1 = 0, Offset2 = 0; + getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); + getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); + if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) return true; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const GlobalValue *GV1 = nullptr; const GlobalValue *GV2 = nullptr; - int64_t Offset1 = 0; - int64_t Offset2 = 0; + Offset1 = 0; + Offset2 = 0; bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) @@ -7970,6 +9285,24 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; + case Intrinsic::ppc_qpx_qvlfd: + case Intrinsic::ppc_qpx_qvlfda: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvlfs: + case Intrinsic::ppc_qpx_qvlfsa: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvlfcd: + case Intrinsic::ppc_qpx_qvlfcda: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvlfcs: + case Intrinsic::ppc_qpx_qvlfcsa: + VT = MVT::v2f32; + break; + case Intrinsic::ppc_qpx_qvlfiwa: + case Intrinsic::ppc_qpx_qvlfiwz: case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_vsx_lxvw4x: @@ -7996,6 +9329,24 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; + case Intrinsic::ppc_qpx_qvstfd: + case Intrinsic::ppc_qpx_qvstfda: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvstfs: + case Intrinsic::ppc_qpx_qvstfsa: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvstfcd: + case Intrinsic::ppc_qpx_qvstfcda: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvstfcs: + case Intrinsic::ppc_qpx_qvstfcsa: + VT = MVT::v2f32; + break; + case Intrinsic::ppc_qpx_qvstfiw: + case Intrinsic::ppc_qpx_qvstfiwa: case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_vsx_stxvw4x: @@ -8067,7 +9418,7 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { for (SmallSet::iterator I = LoadRoots.begin(), IE = LoadRoots.end(); I != IE; ++I) { Queue.push_back(*I); - + while (!Queue.empty()) { SDNode *LoadRoot = Queue.pop_back_val(); if (!Visited.insert(LoadRoot).second) @@ -8194,7 +9545,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, } // Visit all inputs, collect all binary operations (and, or, xor and - // select) that are all fed by extensions. + // select) that are all fed by extensions. while (!BinOps.empty()) { SDValue BinOp = BinOps.back(); BinOps.pop_back(); @@ -8216,7 +9567,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || isa(BinOp.getOperand(i))) { - Inputs.push_back(BinOp.getOperand(i)); + Inputs.push_back(BinOp.getOperand(i)); } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || BinOp.getOperand(i).getOpcode() == ISD::OR || BinOp.getOperand(i).getOpcode() == ISD::XOR || @@ -8296,7 +9647,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, if (isa(Inputs[i])) continue; else - DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); + DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); } // Replace all operations (these are all the same, but have a different @@ -8406,7 +9757,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, SmallPtrSet Visited; // Visit all inputs, collect all binary operations (and, or, xor and - // select) that are all fed by truncations. + // select) that are all fed by truncations. while (!BinOps.empty()) { SDValue BinOp = BinOps.back(); BinOps.pop_back(); @@ -8425,7 +9776,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || isa(BinOp.getOperand(i))) { - Inputs.push_back(BinOp.getOperand(i)); + Inputs.push_back(BinOp.getOperand(i)); } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || BinOp.getOperand(i).getOpcode() == ISD::OR || BinOp.getOperand(i).getOpcode() == ISD::XOR || @@ -8633,16 +9984,17 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), DAG.getConstant(APInt::getLowBitsSet( N->getValueSizeInBits(0), PromBits), - N->getValueType(0))); + dl, N->getValueType(0))); assert(N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"); - EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); + EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); SDValue ShiftCst = - DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy); - return DAG.getNode(ISD::SRA, dl, N->getValueType(0), - DAG.getNode(ISD::SHL, dl, N->getValueType(0), - N->getOperand(0), ShiftCst), ShiftCst); + DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); + return DAG.getNode( + ISD::SRA, dl, N->getValueType(0), + DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), + ShiftCst); } SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, @@ -8694,6 +10046,9 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, if (Src.getValueType() == MVT::f32) { Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); DCI.AddToWorklist(Src.getNode()); + } else if (Src.getValueType() != MVT::f64) { + // Make sure that we don't pick up a ppc_fp128 source value. + return SDValue(); } unsigned FCTOp = @@ -8705,7 +10060,7 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { FP = DAG.getNode(ISD::FP_ROUND, dl, - MVT::f32, FP, DAG.getIntPtrConstant(0)); + MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); DCI.AddToWorklist(FP.getNode()); } @@ -8743,7 +10098,9 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, case ISD::INTRINSIC_W_CHAIN: { MemIntrinsicSDNode *Intrin = cast(N); Chain = Intrin->getChain(); - Base = Intrin->getBasePtr(); + // Similarly to the store case below, Intrin->getBasePtr() doesn't get + // us what we want. Get operand 2 instead. + Base = Intrin->getOperand(2); MMO = Intrin->getMemOperand(); break; } @@ -8821,16 +10178,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; case PPCISD::SHL: - if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { - if (C->isNullValue()) // 0 << V -> 0. + if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. return N->getOperand(0); - } break; case PPCISD::SRL: - if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { - if (C->isNullValue()) // 0 >>u V -> 0. + if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. return N->getOperand(0); - } break; case PPCISD::SRA: if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { @@ -8841,7 +10194,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: + case ISD::ANY_EXTEND: return DAGCombineExtBoolTrunc(N, DCI); case ISD::TRUNCATE: case ISD::SETCC: @@ -8924,14 +10277,20 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return expandVSXLoadForLE(N, DCI); } - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); - if (ISD::isNON_EXTLoad(N) && VT.isVector() && Subtarget.hasAltivec() && - // P8 and later hardware should just use LOAD. - !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || - VT == MVT::v4i32 || VT == MVT::v4f32) && + EVT MemVT = LD->getMemoryVT(); + Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); + Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); + unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); + if (LD->isUnindexed() && VT.isVector() && + ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && + // P8 and later hardware should just use LOAD. + !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || + VT == MVT::v4i32 || VT == MVT::v4f32)) || + (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && + LD->getAlignment() >= ScalarABIAlignment)) && LD->getAlignment() < ABIAlignment) { - // This is a type-legal unaligned Altivec load. + // This is a type-legal unaligned Altivec or QPX load. SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); bool isLittleEndian = Subtarget.isLittleEndian(); @@ -8960,10 +10319,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // a different base address offset from this one by an aligned amount. // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this // optimization later. - Intrinsic::ID Intr = (isLittleEndian ? - Intrinsic::ppc_altivec_lvsr : - Intrinsic::ppc_altivec_lvsl); - SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8); + Intrinsic::ID Intr, IntrLD, IntrPerm; + MVT PermCntlTy, PermTy, LDTy; + if (Subtarget.hasAltivec()) { + Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : + Intrinsic::ppc_altivec_lvsl; + IntrLD = Intrinsic::ppc_altivec_lvx; + IntrPerm = Intrinsic::ppc_altivec_vperm; + PermCntlTy = MVT::v16i8; + PermTy = MVT::v4i32; + LDTy = MVT::v4i32; + } else { + Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : + Intrinsic::ppc_qpx_qvlpcls; + IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : + Intrinsic::ppc_qpx_qvlfs; + IntrPerm = Intrinsic::ppc_qpx_qvfperm; + PermCntlTy = MVT::v4f64; + PermTy = MVT::v4f64; + LDTy = MemVT.getSimpleVT(); + } + + SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); // Create the new MMO for the new base load. It is like the original MMO, // but represents an area in memory almost twice the vector size centered @@ -8973,17 +10350,17 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *BaseMMO = MF.getMachineMemOperand(LD->getMemOperand(), - -LD->getMemoryVT().getStoreSize()+1, - 2*LD->getMemoryVT().getStoreSize()-1); + -(long)MemVT.getStoreSize()+1, + 2*MemVT.getStoreSize()-1); // Create the new base load. - SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx, - getPointerTy()); + SDValue LDXIntID = + DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue BaseLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, - DAG.getVTList(MVT::v4i32, MVT::Other), - BaseLoadOps, MVT::v4i32, BaseMMO); + DAG.getVTList(PermTy, MVT::Other), + BaseLoadOps, LDTy, BaseMMO); // Note that the value of IncOffset (which is provided to the next // load's pointer info offset value, and thus used to calculate the @@ -9002,17 +10379,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (!findConsecutiveLoad(LD, DAG)) --IncValue; - SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); + SDValue Increment = + DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); MachineMemOperand *ExtraMMO = MF.getMachineMemOperand(LD->getMemOperand(), - 1, 2*LD->getMemoryVT().getStoreSize()-1); + 1, 2*MemVT.getStoreSize()-1); SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue ExtraLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, - DAG.getVTList(MVT::v4i32, MVT::Other), - ExtraLoadOps, MVT::v4i32, ExtraMMO); + DAG.getVTList(PermTy, MVT::Other), + ExtraLoadOps, LDTy, ExtraMMO); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, BaseLoad.getValue(1), ExtraLoad.getValue(1)); @@ -9024,14 +10402,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // and ExtraLoad here. SDValue Perm; if (isLittleEndian) - Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, + Perm = BuildIntrinsicOp(IntrPerm, ExtraLoad, BaseLoad, PermCntl, DAG, dl); else - Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, + Perm = BuildIntrinsicOp(IntrPerm, BaseLoad, ExtraLoad, PermCntl, DAG, dl); - if (VT != MVT::v4i32) - Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); + if (VT != PermTy) + Perm = Subtarget.hasAltivec() ? + DAG.getNode(ISD::BITCAST, dl, VT, Perm) : + DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX + DAG.getTargetConstant(1, dl, MVT::i64)); + // second argument is 1 because this rounding + // is always exact. // The output of the permutation is our loaded result, the TokenFactor is // our new chain. @@ -9042,15 +10425,21 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::INTRINSIC_WO_CHAIN: { bool isLittleEndian = Subtarget.isLittleEndian(); + unsigned IID = cast(N->getOperand(0))->getZExtValue(); Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl); - if (cast(N->getOperand(0))->getZExtValue() == Intr && - N->getOperand(1)->getOpcode() == ISD::ADD) { + if ((IID == Intr || + IID == Intrinsic::ppc_qpx_qvlpcld || + IID == Intrinsic::ppc_qpx_qvlpcls) && + N->getOperand(1)->getOpcode() == ISD::ADD) { SDValue Add = N->getOperand(1); + int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? + 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; + if (DAG.MaskedValueIsZero( Add->getOperand(1), - APInt::getAllOnesValue(4 /* 16 byte alignment */) + APInt::getAllOnesValue(Bits /* alignment */) .zext( Add.getValueType().getScalarType().getSizeInBits()))) { SDNode *BasePtr = Add->getOperand(0).getNode(); @@ -9058,8 +10447,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && - cast(UI->getOperand(0))->getZExtValue() == - Intr) { + cast(UI->getOperand(0))->getZExtValue() == IID) { // We've found another LVSL/LVSR, and this address is an aligned // multiple of that one. The results will be the same, so use the // one we've just found instead. @@ -9068,6 +10456,27 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } } + + if (isa(Add->getOperand(1))) { + SDNode *BasePtr = Add->getOperand(0).getNode(); + for (SDNode::use_iterator UI = BasePtr->use_begin(), + UE = BasePtr->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() == ISD::ADD && + isa(UI->getOperand(1)) && + (cast(Add->getOperand(1))->getZExtValue() - + cast(UI->getOperand(1))->getZExtValue()) % + (1ULL << Bits) == 0) { + SDNode *OtherAdd = *UI; + for (SDNode::use_iterator VI = OtherAdd->use_begin(), + VE = OtherAdd->use_end(); VI != VE; ++VI) { + if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && + cast(VI->getOperand(0))->getZExtValue() == IID) { + return SDValue(*VI, 0); + } + } + } + } + } } } @@ -9191,7 +10600,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, case ISD::BRCOND: { SDValue Cond = N->getOperand(1); SDValue Target = N->getOperand(2); - + if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(Cond.getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero) { @@ -9222,8 +10631,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, cast(LHS.getOperand(0).getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero && isa(LHS.getOperand(1)) && - !cast(LHS.getOperand(1))->getConstantIntValue()-> - isZero()) + !isNullConstant(LHS.getOperand(1))) LHS = LHS.getOperand(0); if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && @@ -9252,7 +10660,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && isa(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && - getAltivecCompareInfo(LHS, CompareOpc, isDot)) { + getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { assert(isDot && "Can't compare against a vector result!"); // If this is a comparison against something other than 0/1, then we know @@ -9272,7 +10680,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue Ops[] = { LHS.getOperand(2), // LHS of compare LHS.getOperand(3), // RHS of compare - DAG.getConstant(CompareOpc, MVT::i32) + DAG.getConstant(CompareOpc, dl, MVT::i32) }; EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); @@ -9296,7 +10704,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), - DAG.getConstant(CompOpc, MVT::i32), + DAG.getConstant(CompOpc, dl, MVT::i32), DAG.getRegister(PPC::CR6, MVT::i32), N->getOperand(4), CompNode.getValue(1)); } @@ -9324,14 +10732,14 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, bool IsNegPow2 = (-Divisor).isPowerOf2(); unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); - SDValue ShiftAmt = DAG.getConstant(Lg2, VT); + SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); if (Created) Created->push_back(Op.getNode()); if (IsNegPow2) { - Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op); + Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); if (Created) Created->push_back(Op.getNode()); } @@ -9365,14 +10773,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::ppc_altivec_vcmpequb_p: case Intrinsic::ppc_altivec_vcmpequh_p: case Intrinsic::ppc_altivec_vcmpequw_p: + case Intrinsic::ppc_altivec_vcmpequd_p: case Intrinsic::ppc_altivec_vcmpgefp_p: case Intrinsic::ppc_altivec_vcmpgtfp_p: case Intrinsic::ppc_altivec_vcmpgtsb_p: case Intrinsic::ppc_altivec_vcmpgtsh_p: case Intrinsic::ppc_altivec_vcmpgtsw_p: + case Intrinsic::ppc_altivec_vcmpgtsd_p: case Intrinsic::ppc_altivec_vcmpgtub_p: case Intrinsic::ppc_altivec_vcmpgtuh_p: case Intrinsic::ppc_altivec_vcmpgtuw_p: + case Intrinsic::ppc_altivec_vcmpgtud_p: KnownZero = ~1U; // All bits but the low one are known to be zero. break; } @@ -9400,8 +10811,11 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { // boundary so that the entire loop fits in one instruction-cache line. uint64_t LoopSize = 0; for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) - for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) + for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { LoopSize += TII->GetInstSizeInBytes(J); + if (LoopSize > 32) + break; + } if (LoopSize > 16 && LoopSize <= 32) return 5; @@ -9416,7 +10830,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { /// getConstraintType - Given a constraint, return the type of /// constraint it is for this target. PPCTargetLowering::ConstraintType -PPCTargetLowering::getConstraintType(const std::string &Constraint) const { +PPCTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; @@ -9499,8 +10913,9 @@ PPCTargetLowering::getSingleConstraintMatchWeight( return weight; } -std::pair -PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, +std::pair +PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters @@ -9518,23 +10933,36 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, &PPC::F4RCRegClass); if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::F8RCRegClass); + if (VT == MVT::v4f64 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QFRCRegClass); + if (VT == MVT::v4f32 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QSRCRegClass); break; case 'v': - return std::make_pair(0U, &PPC::VRRCRegClass); + if (VT == MVT::v4f64 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QFRCRegClass); + if (VT == MVT::v4f32 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QSRCRegClass); + if (Subtarget.hasAltivec()) + return std::make_pair(0U, &PPC::VRRCRegClass); case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); } - } else if (Constraint == "wc") { // an individual CR bit. + } else if (Constraint == "wc" && Subtarget.useCRBits()) { + // An individual CR bit. return std::make_pair(0U, &PPC::CRBITRCRegClass); - } else if (Constraint == "wa" || Constraint == "wd" || - Constraint == "wf") { + } else if ((Constraint == "wa" || Constraint == "wd" || + Constraint == "wf") && Subtarget.hasVSX()) { return std::make_pair(0U, &PPC::VSRCRegClass); - } else if (Constraint == "ws") { - return std::make_pair(0U, &PPC::VSFRCRegClass); + } else if (Constraint == "ws" && Subtarget.hasVSX()) { + if (VT == MVT::f32 && Subtarget.hasP8Vector()) + return std::make_pair(0U, &PPC::VSSRCRegClass); + else + return std::make_pair(0U, &PPC::VSFRCRegClass); } - std::pair R = - TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + std::pair R = + TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers // (which we call X[0-9]+). If a 64-bit value has been requested, and a @@ -9543,12 +10971,10 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use // the AsmName field from *RegisterInfo.td, then this would not be necessary. if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && - PPC::GPRCRegClass.contains(R.first)) { - const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + PPC::GPRCRegClass.contains(R.first)) return std::make_pair(TRI->getMatchingSuperReg(R.first, PPC::sub_32, &PPC::G8RCRegClass), &PPC::G8RCRegClass); - } // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { @@ -9559,7 +10985,6 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return R; } - /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, @@ -9584,6 +11009,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'P': { ConstantSDNode *CST = dyn_cast(Op); if (!CST) return; // Must be an immediate to match. + SDLoc dl(Op); int64_t Value = CST->getSExtValue(); EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative // numbers are printed as such. @@ -9591,35 +11017,35 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, default: llvm_unreachable("Unknown constraint letter!"); case 'I': // "I" is a signed 16-bit constant. if (isInt<16>(Value)) - Result = DAG.getTargetConstant(Value, TCVT); + Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'J': // "J" is a constant with only the high-order 16 bits nonzero. if (isShiftedUInt<16, 16>(Value)) - Result = DAG.getTargetConstant(Value, TCVT); + Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. if (isShiftedInt<16, 16>(Value)) - Result = DAG.getTargetConstant(Value, TCVT); + Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'K': // "K" is a constant with only the low-order 16 bits nonzero. if (isUInt<16>(Value)) - Result = DAG.getTargetConstant(Value, TCVT); + Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'M': // "M" is a constant that is greater than 31. if (Value > 31) - Result = DAG.getTargetConstant(Value, TCVT); + Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'N': // "N" is a positive constant that is an exact power of two. if (Value > 0 && isPowerOf2_64(Value)) - Result = DAG.getTargetConstant(Value, TCVT); + Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'O': // "O" is the constant zero. if (Value == 0) - Result = DAG.getTargetConstant(Value, TCVT); + Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'P': // "P" is a constant whose negation is a signed 16-bit constant. if (isInt<16>(-Value)) - Result = DAG.getTargetConstant(Value, TCVT); + Result = DAG.getTargetConstant(Value, dl, TCVT); break; } break; @@ -9637,9 +11063,12 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. -bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { - // FIXME: PPC does not allow r+i addressing modes for vectors! +bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS) const { + // PPC does not allow r+i addressing modes for vectors! + if (Ty->isVectorTy() && AM.BaseOffs != 0) + return false; // PPC allows a sign-extended 16-bit immediate field. if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) @@ -9688,22 +11117,22 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setLRStoreRequired(); bool isPPC64 = Subtarget.isPPC64(); + auto PtrVT = getPointerTy(MF.getDataLayout()); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = - DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), + DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, isPPC64 ? MVT::i64 : MVT::i32); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - DAG.getNode(ISD::ADD, dl, getPointerTy(), - FrameAddr, Offset), + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo(), false, false, false, 0); } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, MachinePointerInfo(), false, false, false, 0); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, + MachinePointerInfo(), false, false, false, 0); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, @@ -9711,18 +11140,17 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - bool isPPC64 = PtrVT == MVT::i64; - MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); + bool isPPC64 = PtrVT == MVT::i64; + // Naked functions never have a frame pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned FrameReg; - if (MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::Naked)) + if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) FrameReg = isPPC64 ? PPC::X1 : PPC::R1; else FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; @@ -9738,8 +11166,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned PPCTargetLowering::getRegisterByName(const char* RegName, - EVT VT) const { +unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const { bool isPPC64 = Subtarget.isPPC64(); bool isDarwinABI = Subtarget.isDarwinABI(); @@ -9771,6 +11199,12 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, unsigned Intrinsic) const { switch (Intrinsic) { + case Intrinsic::ppc_qpx_qvlfd: + case Intrinsic::ppc_qpx_qvlfs: + case Intrinsic::ppc_qpx_qvlfcd: + case Intrinsic::ppc_qpx_qvlfcs: + case Intrinsic::ppc_qpx_qvlfiwa: + case Intrinsic::ppc_qpx_qvlfiwz: case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_altivec_lvebx: @@ -9792,6 +11226,18 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::ppc_vsx_lxvd2x: VT = MVT::v2f64; break; + case Intrinsic::ppc_qpx_qvlfd: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvlfs: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvlfcd: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvlfcs: + VT = MVT::v2f32; + break; default: VT = MVT::v4i32; break; @@ -9808,6 +11254,47 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = false; return true; } + case Intrinsic::ppc_qpx_qvlfda: + case Intrinsic::ppc_qpx_qvlfsa: + case Intrinsic::ppc_qpx_qvlfcda: + case Intrinsic::ppc_qpx_qvlfcsa: + case Intrinsic::ppc_qpx_qvlfiwaa: + case Intrinsic::ppc_qpx_qvlfiwza: { + EVT VT; + switch (Intrinsic) { + case Intrinsic::ppc_qpx_qvlfda: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvlfsa: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvlfcda: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvlfcsa: + VT = MVT::v2f32; + break; + default: + VT = MVT::v4i32; + break; + } + + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = VT; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.size = VT.getStoreSize(); + Info.align = 1; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::ppc_qpx_qvstfd: + case Intrinsic::ppc_qpx_qvstfs: + case Intrinsic::ppc_qpx_qvstfcd: + case Intrinsic::ppc_qpx_qvstfcs: + case Intrinsic::ppc_qpx_qvstfiw: case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_altivec_stvebx: @@ -9829,6 +11316,18 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::ppc_vsx_stxvd2x: VT = MVT::v2f64; break; + case Intrinsic::ppc_qpx_qvstfd: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvstfs: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvstfcd: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvstfcs: + VT = MVT::v2f32; + break; default: VT = MVT::v4i32; break; @@ -9845,6 +11344,41 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::ppc_qpx_qvstfda: + case Intrinsic::ppc_qpx_qvstfsa: + case Intrinsic::ppc_qpx_qvstfcda: + case Intrinsic::ppc_qpx_qvstfcsa: + case Intrinsic::ppc_qpx_qvstfiwa: { + EVT VT; + switch (Intrinsic) { + case Intrinsic::ppc_qpx_qvstfda: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvstfsa: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvstfcda: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvstfcsa: + VT = MVT::v2f32; + break; + default: + VT = MVT::v4i32; + break; + } + + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = VT; + Info.ptrVal = I.getArgOperand(1); + Info.offset = 0; + Info.size = VT.getStoreSize(); + Info.align = 1; + Info.vol = false; + Info.readMem = false; + Info.writeMem = true; + return true; + } default: break; } @@ -9868,11 +11402,29 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { + if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { + const Function *F = MF.getFunction(); + // When expanding a memset, require at least two QPX instructions to cover + // the cost of loading the value to be stored from the constant pool. + if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && + (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && + !F->hasFnAttribute(Attribute::NoImplicitFloat)) { + return MVT::v4f64; + } + + // We should use Altivec/VSX loads and stores when available. For unaligned + // addresses, unaligned VSX loads are only fast starting with the P8. + if (Subtarget.hasAltivec() && Size >= 16 && + (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) || + ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) + return MVT::v4i32; + } + if (Subtarget.isPPC64()) { return MVT::i64; - } else { - return MVT::i32; } + + return MVT::i32; } /// \brief Returns true if it is beneficial to convert a load of a constant @@ -9882,9 +11434,7 @@ bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); - if (BitSize == 0 || BitSize > 64) - return false; - return true; + return !(BitSize == 0 || BitSize > 64); } bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { @@ -10001,11 +11551,26 @@ PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { return ScratchRegs; } +unsigned PPCTargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; +} + +unsigned PPCTargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; +} + bool PPCTargetLowering::shouldExpandBuildVectorWithShuffles( EVT VT , unsigned DefinedValues) const { if (VT == MVT::v2i64) - return false; + return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves + + if (Subtarget.hasQPX()) { + if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1) + return true; + } return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); }