X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FMips%2FMipsSEISelLowering.cpp;h=5167b6baf98db5aa5557d3b2485655c380c1d2d4;hb=7ba5769458c072a56fe2a9605e965d10229b54ce;hp=a4ff3f41eba588db86c1ad639be9862bbd755e74;hpb=f564ea31f005d44aafb09d8a4a155b0ef787d6c3;p=oota-llvm.git diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index a4ff3f41eba..5167b6baf98 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// #include "MipsSEISelLowering.h" +#include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -34,31 +35,28 @@ static cl::opt NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), "stores to their single precision " "counterparts")); -MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) - : MipsTargetLowering(TM) { +MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, + const MipsSubtarget &STI) + : MipsTargetLowering(TM, STI) { // Set up the register classes addRegisterClass(MVT::i32, &Mips::GPR32RegClass); - if (isGP64bit()) + if (Subtarget.isGP64bit()) addRegisterClass(MVT::i64, &Mips::GPR64RegClass); - if (Subtarget->hasDSP() || Subtarget->hasMSA()) { + if (Subtarget.hasDSP() || Subtarget.hasMSA()) { // Expand all truncating stores and extending loads. - unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; - - for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) { - for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1) - setTruncStoreAction((MVT::SimpleValueType)VT0, - (MVT::SimpleValueType)VT1, Expand); - - setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand); - setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand); - setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand); + for (MVT VT0 : MVT::vector_valuetypes()) { + for (MVT VT1 : MVT::vector_valuetypes()) { + setTruncStoreAction(VT0, VT1, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); + } } } - if (Subtarget->hasDSP()) { + if (Subtarget.hasDSP()) { MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { @@ -82,10 +80,10 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::VSELECT); } - if (Subtarget->hasDSPR2()) + if (Subtarget.hasDSPR2()) setOperationAction(ISD::MUL, MVT::v2i16, Legal); - if (Subtarget->hasMSA()) { + if (Subtarget.hasMSA()) { addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); @@ -101,12 +99,12 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::XOR); } - if (!Subtarget->mipsSEUsesSoftFloat()) { + if (!Subtarget.useSoftFloat()) { addRegisterClass(MVT::f32, &Mips::FGR32RegClass); // When dealing with single precision only, use libcalls - if (!Subtarget->isSingleFloat()) { - if (Subtarget->isFP64bit()) + if (!Subtarget.isSingleFloat()) { + if (Subtarget.isFP64bit()) addRegisterClass(MVT::f64, &Mips::FGR64RegClass); else addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); @@ -118,14 +116,18 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::MULHS, MVT::i32, Custom); setOperationAction(ISD::MULHU, MVT::i32, Custom); - if (Subtarget->hasCnMips()) + if (Subtarget.hasCnMips()) setOperationAction(ISD::MUL, MVT::i64, Legal); - else if (isGP64bit()) + else if (Subtarget.isGP64bit()) setOperationAction(ISD::MUL, MVT::i64, Custom); - if (isGP64bit()) { + if (Subtarget.isGP64bit()) { + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); setOperationAction(ISD::MULHS, MVT::i64, Custom); setOperationAction(ISD::MULHU, MVT::i64, Custom); + setOperationAction(ISD::SDIVREM, MVT::i64, Custom); + setOperationAction(ISD::UDIVREM, MVT::i64, Custom); } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); @@ -133,8 +135,6 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SDIVREM, MVT::i32, Custom); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); - setOperationAction(ISD::SDIVREM, MVT::i64, Custom); - setOperationAction(ISD::UDIVREM, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); @@ -152,12 +152,93 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::STORE, MVT::f64, Custom); } - computeRegisterProperties(); + if (Subtarget.hasMips32r6()) { + // MIPS32r6 replaces the accumulator-based multiplies with a three register + // instruction + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::MUL, MVT::i32, Legal); + setOperationAction(ISD::MULHS, MVT::i32, Legal); + setOperationAction(ISD::MULHU, MVT::i32, Legal); + + // MIPS32r6 replaces the accumulator-based division/remainder with separate + // three register division and remainder instructions. + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Legal); + setOperationAction(ISD::UDIV, MVT::i32, Legal); + setOperationAction(ISD::SREM, MVT::i32, Legal); + setOperationAction(ISD::UREM, MVT::i32, Legal); + + // MIPS32r6 replaces conditional moves with an equivalent that removes the + // need for three GPR read ports. + setOperationAction(ISD::SETCC, MVT::i32, Legal); + setOperationAction(ISD::SELECT, MVT::i32, Legal); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + + setOperationAction(ISD::SETCC, MVT::f32, Legal); + setOperationAction(ISD::SELECT, MVT::f32, Legal); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + + assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6"); + setOperationAction(ISD::SETCC, MVT::f64, Legal); + setOperationAction(ISD::SELECT, MVT::f64, Legal); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + + setOperationAction(ISD::BRCOND, MVT::Other, Legal); + + // Floating point > and >= are supported via < and <= + setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); + setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); + setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); + setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); + + setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); + setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); + setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); + setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); + } + + if (Subtarget.hasMips64r6()) { + // MIPS64r6 replaces the accumulator-based multiplies with a three register + // instruction + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::MUL, MVT::i64, Legal); + setOperationAction(ISD::MULHS, MVT::i64, Legal); + setOperationAction(ISD::MULHU, MVT::i64, Legal); + + // MIPS32r6 replaces the accumulator-based division/remainder with separate + // three register division and remainder instructions. + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Legal); + setOperationAction(ISD::UDIV, MVT::i64, Legal); + setOperationAction(ISD::SREM, MVT::i64, Legal); + setOperationAction(ISD::UREM, MVT::i64, Legal); + + // MIPS64r6 replaces conditional moves with an equivalent that removes the + // need for three GPR read ports. + setOperationAction(ISD::SETCC, MVT::i64, Legal); + setOperationAction(ISD::SELECT, MVT::i64, Legal); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + } + + computeRegisterProperties(Subtarget.getRegisterInfo()); } const MipsTargetLowering * -llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { - return new MipsSETargetLowering(TM); +llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, + const MipsSubtarget &STI) { + return new MipsSETargetLowering(TM, STI); +} + +const TargetRegisterClass * +MipsSETargetLowering::getRepRegClassFor(MVT VT) const { + if (VT == MVT::Untyped) + return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; + + return TargetLowering::getRepRegClassFor(VT); } // Enable MSA support for the given integer type and Register class. @@ -249,11 +330,22 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { } bool -MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, - unsigned, - bool *Fast) const { +MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + if (Subtarget.systemSupportsUnalignedAccess()) { + // MIPS32r6/MIPS64r6 is required to support unaligned access. It's + // implementation defined whether this is handled by hardware, software, or + // a hybrid of the two but it's expected that most implementations will + // handle the majority of cases in hardware. + if (Fast) + *Fast = true; + return true; + } + switch (SVT) { case MVT::i64: case MVT::i32: @@ -435,12 +527,12 @@ static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && - selectMADD(N, &DAG)) + if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && + N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG)) return SDValue(N, 0); return SDValue(); @@ -455,8 +547,8 @@ static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, // - Removes redundant zero extensions performed by an ISD::AND. static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { - if (!Subtarget->hasMSA()) + const MipsSubtarget &Subtarget) { + if (!Subtarget.hasMSA()) return SDValue(); SDValue Op0 = N->getOperand(0); @@ -487,10 +579,9 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || Log2 == ExtendTySize) { SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; - DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT, - Op0->getVTList(), - ArrayRef(Ops, Op0->getNumOperands())); - return Op0; + return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), + Op0->getVTList(), + makeArrayRef(Ops, Op0->getNumOperands())); } } @@ -571,8 +662,8 @@ static bool isBitwiseInverse(SDValue N, SDValue OfNode) { // vector type. static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { - if (!Subtarget->hasMSA()) + const MipsSubtarget &Subtarget) { + if (!Subtarget.hasMSA()) return SDValue(); EVT Ty = N->getValueType(0); @@ -588,7 +679,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, SDValue Op0Op1 = Op0->getOperand(1); SDValue Op1Op0 = Op1->getOperand(0); SDValue Op1Op1 = Op1->getOperand(1); - bool IsLittleEndian = !Subtarget->isLittle(); + bool IsLittleEndian = !Subtarget.isLittle(); SDValue IfSet, IfClr, Cond; bool IsConstantMask = false; @@ -691,11 +782,11 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && + if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 && selectMSUB(N, &DAG)) return SDValue(N, 0); @@ -709,7 +800,7 @@ static SDValue genConstMult(SDValue X, uint64_t C, SDLoc DL, EVT VT, // Return 0. if (C == 0) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); // Return x. if (C == 1) @@ -718,7 +809,7 @@ static SDValue genConstMult(SDValue X, uint64_t C, SDLoc DL, EVT VT, // If c is power of 2, return (shl x, log2(c)). if (isPowerOf2_64(C)) return DAG.getNode(ISD::SHL, DL, VT, X, - DAG.getConstant(Log2_64(C), ShiftTy)); + DAG.getConstant(Log2_64(C), DL, ShiftTy)); unsigned Log2Ceil = Log2_64_Ceil(C); uint64_t Floor = 1LL << Log2_64(C); @@ -747,15 +838,16 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, if (ConstantSDNode *C = dyn_cast(N->getOperand(1))) if (!VT.isVector()) - return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), - VT, TL->getScalarShiftAmountTy(VT), DAG); + return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), VT, + TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), + DAG); return SDValue(N, 0); } static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, SelectionDAG &DAG, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { // See if this is a vector splat immediate node. APInt SplatValue, SplatUndef; unsigned SplatBitSize; @@ -763,23 +855,24 @@ static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, unsigned EltSize = Ty.getVectorElementType().getSizeInBits(); BuildVectorSDNode *BV = dyn_cast(N->getOperand(1)); - if (!Subtarget->hasDSP()) + if (!Subtarget.hasDSP()) return SDValue(); if (!BV || !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, - EltSize, !Subtarget->isLittle()) || + EltSize, !Subtarget.isLittle()) || (SplatBitSize != EltSize) || (SplatValue.getZExtValue() >= EltSize)) return SDValue(); - return DAG.getNode(Opc, SDLoc(N), Ty, N->getOperand(0), - DAG.getConstant(SplatValue.getZExtValue(), MVT::i32)); + SDLoc DL(N); + return DAG.getNode(Opc, DL, Ty, N->getOperand(0), + DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32)); } static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { EVT Ty = N->getValueType(0); if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) @@ -802,10 +895,10 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, // used for DSPr2. static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { EVT Ty = N->getValueType(0); - if (Subtarget->hasMSA()) { + if (Subtarget.hasMSA()) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); @@ -832,15 +925,14 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TotalBits <= 32)) { SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), Op0Op0->getOperand(2) }; - DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT, - Op0Op0->getVTList(), - ArrayRef(Ops, Op0Op0->getNumOperands())); - return Op0Op0; + return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), + Op0Op0->getVTList(), + makeArrayRef(Ops, Op0Op0->getNumOperands())); } } } - if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2())) + if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) return SDValue(); return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); @@ -849,10 +941,10 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { EVT Ty = N->getValueType(0); - if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8)) + if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) return SDValue(); return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); @@ -946,10 +1038,10 @@ static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { } static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { EVT Ty = N->getValueType(0); - if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { + if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { // Try the following combines: // (xor (or $a, $b), (build_vector allones)) // (xor (or $a, $b), (bitcast (build_vector allones))) @@ -1054,6 +1146,24 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return emitINSERT_FW(MI, BB); case Mips::INSERT_FD_PSEUDO: return emitINSERT_FD(MI, BB); + case Mips::INSERT_B_VIDX_PSEUDO: + case Mips::INSERT_B_VIDX64_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 1, false); + case Mips::INSERT_H_VIDX_PSEUDO: + case Mips::INSERT_H_VIDX64_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 2, false); + case Mips::INSERT_W_VIDX_PSEUDO: + case Mips::INSERT_W_VIDX64_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 4, false); + case Mips::INSERT_D_VIDX_PSEUDO: + case Mips::INSERT_D_VIDX64_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 8, false); + case Mips::INSERT_FW_VIDX_PSEUDO: + case Mips::INSERT_FW_VIDX64_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 4, true); + case Mips::INSERT_FD_VIDX_PSEUDO: + case Mips::INSERT_FD_VIDX64_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 8, true); case Mips::FILL_FW_PSEUDO: return emitFILL_FW(MI, BB); case Mips::FILL_FD_PSEUDO: @@ -1065,15 +1175,14 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } } -bool MipsSETargetLowering:: -isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const { +bool MipsSETargetLowering::isEligibleForTailCallOptimization( + const CCState &CCInfo, unsigned NextStackOffset, + const MipsFunctionInfo &FI) const { if (!EnableMipsTailCalls) return false; // Return false if either the callee or caller has a byval argument. - if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) + if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) return false; // Return true if the callee's argument area is no larger than the @@ -1085,10 +1194,12 @@ void MipsSETargetLowering:: getOpndList(SmallVectorImpl &Ops, std::deque< std::pair > &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, - CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, + SDValue Chain) const { Ops.push_back(Callee); MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, - InternalLinkage, CLI, Callee, Chain); + InternalLinkage, IsCallReloc, CLI, Callee, + Chain); } SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { @@ -1109,13 +1220,13 @@ SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { Nd.getAlignment()); // i32 load from higher address. - Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); + Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), Nd.isVolatile(), Nd.isNonTemporal(), Nd.isInvariant(), std::min(Nd.getAlignment(), 4U)); - if (!Subtarget->isLittle()) + if (!Subtarget.isLittle()) std::swap(Lo, Hi); SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); @@ -1134,28 +1245,31 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); EVT PtrVT = Ptr.getValueType(); SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, - Val, DAG.getConstant(0, MVT::i32)); + Val, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, - Val, DAG.getConstant(1, MVT::i32)); + Val, DAG.getConstant(1, DL, MVT::i32)); - if (!Subtarget->isLittle()) + if (!Subtarget.isLittle()) std::swap(Lo, Hi); // i32 store to lower address. Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(), - Nd.getTBAAInfo()); + Nd.getAAInfo()); // i32 store to higher address. - Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); + Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), Nd.isVolatile(), Nd.isNonTemporal(), - std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo()); + std::min(Nd.getAlignment(), 4U), Nd.getAAInfo()); } SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, SelectionDAG &DAG) const { + // MIPS32r6/MIPS64r6 removed accumulator based multiplies. + assert(!Subtarget.hasMips32r6()); + EVT Ty = Op.getOperand(0).getValueType(); SDLoc DL(Op); SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, @@ -1177,9 +1291,9 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, static SDValue initAccumulator(SDValue In, SDLoc DL, SelectionDAG &DAG) { SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(0, DL, MVT::i32)); SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, DL, MVT::i32)); return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); } @@ -1275,7 +1389,7 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { SDValue LaneB = Op->getOperand(2); if (ResVecTy == MVT::v2i64) { - LaneA = DAG.getConstant(0, MVT::i32); + LaneA = DAG.getConstant(0, DL, MVT::i32); ViaVecTy = MVT::v4i32; } else LaneA = LaneB; @@ -1284,7 +1398,7 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, - ArrayRef(Ops, ViaVecTy.getVectorNumElements())); + makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); if (ViaVecTy != ResVecTy) Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result); @@ -1293,7 +1407,8 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { } static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { - return DAG.getConstant(Op->getConstantOperandVal(ImmOp), Op->getValueType(0)); + return DAG.getConstant(Op->getConstantOperandVal(ImmOp), SDLoc(Op), + Op->getValueType(0)); } static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, @@ -1309,7 +1424,7 @@ static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, - DAG.getConstant(32, MVT::i32)); + DAG.getConstant(32, DL, MVT::i32)); SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); } @@ -1324,7 +1439,7 @@ static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, SplatValueA, SplatValueB, SplatValueA, SplatValueB }; SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, - ArrayRef(Ops, ViaVecTy.getVectorNumElements())); + makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); if (VecTy != ViaVecTy) Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); @@ -1345,8 +1460,9 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, if (ConstantSDNode *CImm = dyn_cast(Imm)) { APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); - SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), MVT::i32); - SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), MVT::i32); + SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL, + MVT::i32); + SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32); if (BigEndian) std::swap(BitImmLoOp, BitImmHiOp); @@ -1368,8 +1484,8 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); - Exp2Imm = - DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, VecTy), Exp2Imm); + Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy), + Exp2Imm); } return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); @@ -1378,7 +1494,7 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { EVT ResTy = Op->getValueType(0); SDLoc DL(Op); - SDValue One = DAG.getConstant(1, ResTy); + SDValue One = DAG.getConstant(1, DL, ResTy); SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2)); return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), @@ -1390,7 +1506,7 @@ static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { EVT ResTy = Op->getValueType(0); APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1) << cast(Op->getOperand(2))->getAPIntValue(); - SDValue BitMask = DAG.getConstant(~BitImm, ResTy); + SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); } @@ -1472,8 +1588,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), Op->getConstantOperandVal(3)); return DAG.getNode(ISD::VSELECT, DL, VecTy, - DAG.getConstant(Mask, VecTy, true), Op->getOperand(2), - Op->getOperand(1)); + DAG.getConstant(Mask, DL, VecTy, true), + Op->getOperand(2), Op->getOperand(1)); } case Intrinsic::mips_binsri_b: case Intrinsic::mips_binsri_h: @@ -1485,8 +1601,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), Op->getConstantOperandVal(3)); return DAG.getNode(ISD::VSELECT, DL, VecTy, - DAG.getConstant(Mask, VecTy, true), Op->getOperand(2), - Op->getOperand(1)); + DAG.getConstant(Mask, DL, VecTy, true), + Op->getOperand(2), Op->getOperand(1)); } case Intrinsic::mips_bmnz_v: return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), @@ -1507,7 +1623,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_bneg_w: case Intrinsic::mips_bneg_d: { EVT VecTy = Op->getValueType(0); - SDValue One = DAG.getConstant(1, VecTy); + SDValue One = DAG.getConstant(1, DL, VecTy); return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), DAG.getNode(ISD::SHL, DL, VecTy, One, @@ -1518,7 +1634,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_bnegi_w: case Intrinsic::mips_bnegi_d: return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), - !Subtarget->isLittle()); + !Subtarget.isLittle()); case Intrinsic::mips_bnz_b: case Intrinsic::mips_bnz_h: case Intrinsic::mips_bnz_w: @@ -1543,7 +1659,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_bset_w: case Intrinsic::mips_bset_d: { EVT VecTy = Op->getValueType(0); - SDValue One = DAG.getConstant(1, VecTy); + SDValue One = DAG.getConstant(1, DL, VecTy); return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), DAG.getNode(ISD::SHL, DL, VecTy, One, @@ -1554,7 +1670,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_bseti_w: case Intrinsic::mips_bseti_d: return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), - !Subtarget->isLittle()); + !Subtarget.isLittle()); case Intrinsic::mips_bz_b: case Intrinsic::mips_bz_h: case Intrinsic::mips_bz_w: @@ -1629,7 +1745,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_copy_s_w: return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); case Intrinsic::mips_copy_s_d: - if (hasMips64()) + if (Subtarget.hasMips64()) // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); else { @@ -1644,7 +1760,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_copy_u_w: return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); case Intrinsic::mips_copy_u_d: - if (hasMips64()) + if (Subtarget.hasMips64()) // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); else { @@ -1670,9 +1786,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_fadd_w: - case Intrinsic::mips_fadd_d: + case Intrinsic::mips_fadd_d: { + // TODO: If intrinsics have fast-math-flags, propagate them. return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); + } // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away case Intrinsic::mips_fceq_w: case Intrinsic::mips_fceq_d: @@ -1715,9 +1833,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETUNE); case Intrinsic::mips_fdiv_w: - case Intrinsic::mips_fdiv_d: + case Intrinsic::mips_fdiv_d: { + // TODO: If intrinsics have fast-math-flags, propagate them. return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); + } case Intrinsic::mips_ffint_u_w: case Intrinsic::mips_ffint_u_d: return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), @@ -1730,11 +1850,9 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_fill_h: case Intrinsic::mips_fill_w: case Intrinsic::mips_fill_d: { - SmallVector Ops; EVT ResTy = Op->getValueType(0); - - for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) - Ops.push_back(Op->getOperand(1)); + SmallVector Ops(ResTy.getVectorNumElements(), + Op->getOperand(1)); // If ResTy is v2i64 then the type legalizer will break this node down into // an equivalent v4i32. @@ -1742,6 +1860,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, } case Intrinsic::mips_fexp2_w: case Intrinsic::mips_fexp2_d: { + // TODO: If intrinsics have fast-math-flags, propagate them. EVT ResTy = Op->getValueType(0); return DAG.getNode( ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), @@ -1755,11 +1874,14 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); case Intrinsic::mips_fmul_w: - case Intrinsic::mips_fmul_d: + case Intrinsic::mips_fmul_d: { + // TODO: If intrinsics have fast-math-flags, propagate them. return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); + } case Intrinsic::mips_fmsub_w: case Intrinsic::mips_fmsub_d: { + // TODO: If intrinsics have fast-math-flags, propagate them. EVT ResTy = Op->getValueType(0); return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1), DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy, @@ -1772,9 +1894,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_fsqrt_d: return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); case Intrinsic::mips_fsub_w: - case Intrinsic::mips_fsub_d: + case Intrinsic::mips_fsub_d: { + // TODO: If intrinsics have fast-math-flags, propagate them. return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); + } case Intrinsic::mips_ftrunc_u_w: case Intrinsic::mips_ftrunc_u_d: return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), @@ -1819,7 +1943,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_insve_d: return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(0, DL, MVT::i32)); case Intrinsic::mips_ldi_b: case Intrinsic::mips_ldi_h: case Intrinsic::mips_ldi_w: @@ -2185,9 +2309,9 @@ lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { static bool isConstantOrUndef(const SDValue Op) { if (Op->getOpcode() == ISD::UNDEF) return true; - if (dyn_cast(Op)) + if (isa(Op)) return true; - if (dyn_cast(Op)) + if (isa(Op)) return true; return false; } @@ -2221,12 +2345,12 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, unsigned SplatBitSize; bool HasAnyUndefs; - if (!Subtarget->hasMSA() || !ResTy.is128BitVector()) + if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) return SDValue(); if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 8, - !Subtarget->isLittle()) && SplatBitSize <= 64) { + !Subtarget.isLittle()) && SplatBitSize <= 64) { // We can only cope with 8, 16, 32, or 64-bit elements if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && SplatBitSize != 64) @@ -2259,7 +2383,7 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, } // SelectionDAG::getConstant will promote SplatValue appropriately. - SDValue Result = DAG.getConstant(SplatValue, ViaVecTy); + SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); // Bitcast to the type we originally wanted if (ViaVecTy != ResTy) @@ -2281,7 +2405,7 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, for (unsigned i = 0; i < NumElts; ++i) { Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Node->getOperand(i), - DAG.getConstant(i, MVT::i32)); + DAG.getConstant(i, DL, MVT::i32)); } return Vector; } @@ -2297,7 +2421,7 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, // It is therefore possible to lower into SHF when the mask takes the form: // // When undef's appear they are treated as if they were whatever value is -// necessary in order to fit the above form. +// necessary in order to fit the above forms. // // For example: // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, @@ -2351,181 +2475,331 @@ static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, Imm |= Idx & 0x3; } - return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy, - DAG.getConstant(Imm, MVT::i32), Op->getOperand(0)); + SDLoc DL(Op); + return DAG.getNode(MipsISD::SHF, DL, ResTy, + DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); +} + +/// Determine whether a range fits a regular pattern of values. +/// This function accounts for the possibility of jumping over the End iterator. +template +static bool +fitsRegularPattern(typename SmallVectorImpl::const_iterator Begin, + unsigned CheckStride, + typename SmallVectorImpl::const_iterator End, + ValType ExpectedIndex, unsigned ExpectedIndexStride) { + auto &I = Begin; + + while (I != End) { + if (*I != -1 && *I != ExpectedIndex) + return false; + ExpectedIndex += ExpectedIndexStride; + + // Incrementing past End is undefined behaviour so we must increment one + // step at a time and check for End at each step. + for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) + ; // Empty loop body. + } + return true; +} + +// Determine whether VECTOR_SHUFFLE is a SPLATI. +// +// It is a SPLATI when the mask is: +// +// where x is any valid index. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + int SplatIndex = -1; + for (const auto &V : Indices) { + if (V != -1) { + SplatIndex = V; + break; + } + } + + return fitsRegularPattern(Indices.begin(), 1, Indices.end(), SplatIndex, + 0); } // Lower VECTOR_SHUFFLE into ILVEV (if possible). // // ILVEV interleaves the even elements from each vector. // -// It is possible to lower into ILVEV when the mask takes the form: -// <0, n, 2, n+2, 4, n+4, ...> +// It is possible to lower into ILVEV when the mask consists of two of the +// following forms interleaved: +// <0, 2, 4, ...> +// // where n is the number of elements in the vector. +// For example: +// <0, 0, 2, 2, 4, 4, ...> +// <0, n, 2, n+2, 4, n+4, ...> // // When undef's appear in the mask they are treated as if they were whatever -// value is necessary in order to fit the above form. +// value is necessary in order to fit the above forms. static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, SmallVector Indices, SelectionDAG &DAG) { - assert ((Indices.size() % 2) == 0); - int WsIdx = 0; - int WtIdx = ResTy.getVectorNumElements(); + assert((Indices.size() % 2) == 0); + + SDValue Wt; + SDValue Ws; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + // Check even elements are taken from the even elements of one half or the + // other and pick an operand accordingly. + if (fitsRegularPattern(Begin, 2, End, 0, 2)) + Wt = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 2)) + Wt = Op->getOperand(1); + else + return SDValue(); - for (unsigned i = 0; i < Indices.size(); i += 2) { - if (Indices[i] != -1 && Indices[i] != WsIdx) - return SDValue(); - if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) - return SDValue(); - WsIdx += 2; - WtIdx += 2; - } + // Check odd elements are taken from the even elements of one half or the + // other and pick an operand accordingly. + if (fitsRegularPattern(Begin + 1, 2, End, 0, 2)) + Ws = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 2)) + Ws = Op->getOperand(1); + else + return SDValue(); - return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Op->getOperand(0), - Op->getOperand(1)); + return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt); } // Lower VECTOR_SHUFFLE into ILVOD (if possible). // // ILVOD interleaves the odd elements from each vector. // -// It is possible to lower into ILVOD when the mask takes the form: -// <1, n+1, 3, n+3, 5, n+5, ...> +// It is possible to lower into ILVOD when the mask consists of two of the +// following forms interleaved: +// <1, 3, 5, ...> +// // where n is the number of elements in the vector. +// For example: +// <1, 1, 3, 3, 5, 5, ...> +// <1, n+1, 3, n+3, 5, n+5, ...> // // When undef's appear in the mask they are treated as if they were whatever -// value is necessary in order to fit the above form. +// value is necessary in order to fit the above forms. static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, SmallVector Indices, SelectionDAG &DAG) { - assert ((Indices.size() % 2) == 0); - int WsIdx = 1; - int WtIdx = ResTy.getVectorNumElements() + 1; + assert((Indices.size() % 2) == 0); + + SDValue Wt; + SDValue Ws; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + // Check even elements are taken from the odd elements of one half or the + // other and pick an operand accordingly. + if (fitsRegularPattern(Begin, 2, End, 1, 2)) + Wt = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size() + 1, 2)) + Wt = Op->getOperand(1); + else + return SDValue(); - for (unsigned i = 0; i < Indices.size(); i += 2) { - if (Indices[i] != -1 && Indices[i] != WsIdx) - return SDValue(); - if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) - return SDValue(); - WsIdx += 2; - WtIdx += 2; - } + // Check odd elements are taken from the odd elements of one half or the + // other and pick an operand accordingly. + if (fitsRegularPattern(Begin + 1, 2, End, 1, 2)) + Ws = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + 1, 2)) + Ws = Op->getOperand(1); + else + return SDValue(); - return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Op->getOperand(0), - Op->getOperand(1)); + return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws); } -// Lower VECTOR_SHUFFLE into ILVL (if possible). +// Lower VECTOR_SHUFFLE into ILVR (if possible). // -// ILVL interleaves consecutive elements from the left half of each vector. +// ILVR interleaves consecutive elements from the right (lowest-indexed) half of +// each vector. // -// It is possible to lower into ILVL when the mask takes the form: -// <0, n, 1, n+1, 2, n+2, ...> +// It is possible to lower into ILVR when the mask consists of two of the +// following forms interleaved: +// <0, 1, 2, ...> +// // where n is the number of elements in the vector. +// For example: +// <0, 0, 1, 1, 2, 2, ...> +// <0, n, 1, n+1, 2, n+2, ...> // // When undef's appear in the mask they are treated as if they were whatever -// value is necessary in order to fit the above form. -static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, SmallVector Indices, SelectionDAG &DAG) { - assert ((Indices.size() % 2) == 0); - int WsIdx = 0; - int WtIdx = ResTy.getVectorNumElements(); + assert((Indices.size() % 2) == 0); + + SDValue Wt; + SDValue Ws; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + // Check even elements are taken from the right (lowest-indexed) elements of + // one half or the other and pick an operand accordingly. + if (fitsRegularPattern(Begin, 2, End, 0, 1)) + Wt = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 1)) + Wt = Op->getOperand(1); + else + return SDValue(); - for (unsigned i = 0; i < Indices.size(); i += 2) { - if (Indices[i] != -1 && Indices[i] != WsIdx) - return SDValue(); - if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) - return SDValue(); - WsIdx ++; - WtIdx ++; - } + // Check odd elements are taken from the right (lowest-indexed) elements of + // one half or the other and pick an operand accordingly. + if (fitsRegularPattern(Begin + 1, 2, End, 0, 1)) + Ws = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 1)) + Ws = Op->getOperand(1); + else + return SDValue(); - return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Op->getOperand(0), - Op->getOperand(1)); + return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt); } -// Lower VECTOR_SHUFFLE into ILVR (if possible). +// Lower VECTOR_SHUFFLE into ILVL (if possible). // -// ILVR interleaves consecutive elements from the right half of each vector. +// ILVL interleaves consecutive elements from the left (highest-indexed) half +// of each vector. // -// It is possible to lower into ILVR when the mask takes the form: -// +// It is possible to lower into ILVL when the mask consists of two of the +// following forms interleaved: +// +// // where n is the number of elements in the vector and x is half n. +// For example: +// +// // // When undef's appear in the mask they are treated as if they were whatever -// value is necessary in order to fit the above form. -static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, SmallVector Indices, SelectionDAG &DAG) { - assert ((Indices.size() % 2) == 0); - unsigned NumElts = ResTy.getVectorNumElements(); - int WsIdx = NumElts / 2; - int WtIdx = NumElts + NumElts / 2; + assert((Indices.size() % 2) == 0); + + unsigned HalfSize = Indices.size() / 2; + SDValue Wt; + SDValue Ws; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + // Check even elements are taken from the left (highest-indexed) elements of + // one half or the other and pick an operand accordingly. + if (fitsRegularPattern(Begin, 2, End, HalfSize, 1)) + Wt = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size() + HalfSize, 1)) + Wt = Op->getOperand(1); + else + return SDValue(); - for (unsigned i = 0; i < Indices.size(); i += 2) { - if (Indices[i] != -1 && Indices[i] != WsIdx) - return SDValue(); - if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) - return SDValue(); - WsIdx ++; - WtIdx ++; - } + // Check odd elements are taken from the left (highest-indexed) elements of + // one half or the other and pick an operand accordingly. + if (fitsRegularPattern(Begin + 1, 2, End, HalfSize, 1)) + Ws = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + HalfSize, + 1)) + Ws = Op->getOperand(1); + else + return SDValue(); - return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Op->getOperand(0), - Op->getOperand(1)); + return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt); } // Lower VECTOR_SHUFFLE into PCKEV (if possible). // // PCKEV copies the even elements of each vector into the result vector. // -// It is possible to lower into PCKEV when the mask takes the form: -// <0, 2, 4, ..., n, n+2, n+4, ...> +// It is possible to lower into PCKEV when the mask consists of two of the +// following forms concatenated: +// <0, 2, 4, ...> +// // where n is the number of elements in the vector. +// For example: +// <0, 2, 4, ..., 0, 2, 4, ...> +// <0, 2, 4, ..., n, n+2, n+4, ...> // // When undef's appear in the mask they are treated as if they were whatever -// value is necessary in order to fit the above form. +// value is necessary in order to fit the above forms. static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, SmallVector Indices, SelectionDAG &DAG) { - assert ((Indices.size() % 2) == 0); - int Idx = 0; + assert((Indices.size() % 2) == 0); + + SDValue Wt; + SDValue Ws; + const auto &Begin = Indices.begin(); + const auto &Mid = Indices.begin() + Indices.size() / 2; + const auto &End = Indices.end(); + + if (fitsRegularPattern(Begin, 1, Mid, 0, 2)) + Wt = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 1, Mid, Indices.size(), 2)) + Wt = Op->getOperand(1); + else + return SDValue(); - for (unsigned i = 0; i < Indices.size(); ++i) { - if (Indices[i] != -1 && Indices[i] != Idx) - return SDValue(); - Idx += 2; - } + if (fitsRegularPattern(Mid, 1, End, 0, 2)) + Ws = Op->getOperand(0); + else if (fitsRegularPattern(Mid, 1, End, Indices.size(), 2)) + Ws = Op->getOperand(1); + else + return SDValue(); - return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Op->getOperand(0), - Op->getOperand(1)); + return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt); } // Lower VECTOR_SHUFFLE into PCKOD (if possible). // // PCKOD copies the odd elements of each vector into the result vector. // -// It is possible to lower into PCKOD when the mask takes the form: -// <1, 3, 5, ..., n+1, n+3, n+5, ...> +// It is possible to lower into PCKOD when the mask consists of two of the +// following forms concatenated: +// <1, 3, 5, ...> +// // where n is the number of elements in the vector. +// For example: +// <1, 3, 5, ..., 1, 3, 5, ...> +// <1, 3, 5, ..., n+1, n+3, n+5, ...> // // When undef's appear in the mask they are treated as if they were whatever -// value is necessary in order to fit the above form. +// value is necessary in order to fit the above forms. static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, SmallVector Indices, SelectionDAG &DAG) { - assert ((Indices.size() % 2) == 0); - int Idx = 1; + assert((Indices.size() % 2) == 0); + + SDValue Wt; + SDValue Ws; + const auto &Begin = Indices.begin(); + const auto &Mid = Indices.begin() + Indices.size() / 2; + const auto &End = Indices.end(); + + if (fitsRegularPattern(Begin, 1, Mid, 1, 2)) + Wt = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 1, Mid, Indices.size() + 1, 2)) + Wt = Op->getOperand(1); + else + return SDValue(); - for (unsigned i = 0; i < Indices.size(); ++i) { - if (Indices[i] != -1 && Indices[i] != Idx) - return SDValue(); - Idx += 2; - } + if (fitsRegularPattern(Mid, 1, End, 1, 2)) + Ws = Op->getOperand(0); + else if (fitsRegularPattern(Mid, 1, End, Indices.size() + 1, 2)) + Ws = Op->getOperand(1); + else + return SDValue(); - return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Op->getOperand(0), - Op->getOperand(1)); + return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt); } // Lower VECTOR_SHUFFLE into VSHF. @@ -2561,7 +2835,7 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, for (SmallVector::iterator I = Indices.begin(); I != Indices.end(); ++I) - Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy)); + Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy)); SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, Ops); @@ -2601,10 +2875,11 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, for (int i = 0; i < ResTyNumElts; ++i) Indices.push_back(Node->getMaskElt(i)); - SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); - if (Result.getNode()) - return Result; - Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG); + // splati.[bhwd] is preferable to the others but is matched from + // MipsISD::VSHF. + if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); + SDValue Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG); if (Result.getNode()) return Result; Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG); @@ -2620,6 +2895,9 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, if (Result.getNode()) return Result; Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); if (Result.getNode()) return Result; return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); @@ -2641,7 +2919,7 @@ emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ // $vr0 = phi($vr2, $fbb, $vr1, $tbb) MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const TargetRegisterClass *RC = &Mips::GPR32RegClass; DebugLoc DL = MI->getDebugLoc(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -2706,7 +2984,7 @@ emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB, // $rd = phi($rd1, $fbb, $rd2, $tbb) MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const TargetRegisterClass *RC = &Mips::GPR32RegClass; DebugLoc DL = MI->getDebugLoc(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -2767,17 +3045,28 @@ emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB, // for lane 1 because it would require FR=0 mode which isn't supported by MSA. MachineBasicBlock * MipsSETargetLowering:: emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{ - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned Fd = MI->getOperand(0).getReg(); unsigned Ws = MI->getOperand(1).getReg(); unsigned Lane = MI->getOperand(2).getImm(); - if (Lane == 0) - BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo); - else { - unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + if (Lane == 0) { + unsigned Wt = Ws; + if (!Subtarget.useOddSPReg()) { + // We must copy to an even-numbered MSA register so that the + // single-precision sub-register is also guaranteed to be even-numbered. + Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); + + BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); + } + + BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); + } else { + unsigned Wt = RegInfo.createVirtualRegister( + Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : + &Mips::MSA128WEvensRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); @@ -2799,9 +3088,9 @@ emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{ // valid because FR=1 mode which is the only supported mode in MSA. MachineBasicBlock * MipsSETargetLowering:: emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{ - assert(Subtarget->isFP64bit()); + assert(Subtarget.isFP64bit()); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); unsigned Fd = MI->getOperand(0).getReg(); unsigned Ws = MI->getOperand(1).getReg(); @@ -2830,14 +3119,16 @@ emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{ MachineBasicBlock * MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned Wd = MI->getOperand(0).getReg(); unsigned Wd_in = MI->getOperand(1).getReg(); unsigned Lane = MI->getOperand(2).getImm(); unsigned Fs = MI->getOperand(3).getReg(); - unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + unsigned Wt = RegInfo.createVirtualRegister( + Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : + &Mips::MSA128WEvensRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) .addImm(0) @@ -2862,9 +3153,9 @@ MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI, MachineBasicBlock * MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI, MachineBasicBlock *BB) const { - assert(Subtarget->isFP64bit()); + assert(Subtarget.isFP64bit()); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned Wd = MI->getOperand(0).getReg(); @@ -2887,6 +3178,132 @@ MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI, return BB; } +// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. +// +// For integer: +// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) +// => +// (SLL $lanetmp1, $lane, +// (SUBREG_TO_REG $wt, $fs, ) +// (SLL $lanetmp1, $lane, getParent()->getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Wd = MI->getOperand(0).getReg(); + unsigned SrcVecReg = MI->getOperand(1).getReg(); + unsigned LaneReg = MI->getOperand(2).getReg(); + unsigned SrcValReg = MI->getOperand(3).getReg(); + + const TargetRegisterClass *VecRC = nullptr; + const TargetRegisterClass *GPRRC = + Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; + unsigned EltLog2Size; + unsigned InsertOp = 0; + unsigned InsveOp = 0; + switch (EltSizeInBytes) { + default: + llvm_unreachable("Unexpected size"); + case 1: + EltLog2Size = 0; + InsertOp = Mips::INSERT_B; + InsveOp = Mips::INSVE_B; + VecRC = &Mips::MSA128BRegClass; + break; + case 2: + EltLog2Size = 1; + InsertOp = Mips::INSERT_H; + InsveOp = Mips::INSVE_H; + VecRC = &Mips::MSA128HRegClass; + break; + case 4: + EltLog2Size = 2; + InsertOp = Mips::INSERT_W; + InsveOp = Mips::INSVE_W; + VecRC = &Mips::MSA128WRegClass; + break; + case 8: + EltLog2Size = 3; + InsertOp = Mips::INSERT_D; + InsveOp = Mips::INSVE_D; + VecRC = &Mips::MSA128DRegClass; + break; + } + + if (IsFP) { + unsigned Wt = RegInfo.createVirtualRegister(VecRC); + BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) + .addImm(0) + .addReg(SrcValReg) + .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); + SrcValReg = Wt; + } + + // Convert the lane index into a byte index + if (EltSizeInBytes != 1) { + unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); + BuildMI(*BB, MI, DL, TII->get(Mips::SLL), LaneTmp1) + .addReg(LaneReg) + .addImm(EltLog2Size); + LaneReg = LaneTmp1; + } + + // Rotate bytes around so that the desired lane is element zero + unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC); + BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) + .addReg(SrcVecReg) + .addReg(SrcVecReg) + .addReg(LaneReg); + + unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC); + if (IsFP) { + // Use insve.df to insert to element zero + BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) + .addReg(WdTmp1) + .addImm(0) + .addReg(SrcValReg) + .addImm(0); + } else { + // Use insert.df to insert to element zero + BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) + .addReg(WdTmp1) + .addReg(SrcValReg) + .addImm(0); + } + + // Rotate elements the rest of the way for a full rotation. + // sld.df inteprets $rt modulo the number of columns so we only need to negate + // the lane index to do this. + unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); + BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), + LaneTmp2) + .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) + .addReg(LaneReg); + BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) + .addReg(WdTmp2) + .addReg(WdTmp2) + .addReg(LaneTmp2); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + // Emit the FILL_FW pseudo instruction. // // fill_fw_pseudo $wd, $fs @@ -2897,7 +3314,7 @@ MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI, MachineBasicBlock * MipsSETargetLowering::emitFILL_FW(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned Wd = MI->getOperand(0).getReg(); @@ -2926,9 +3343,9 @@ MipsSETargetLowering::emitFILL_FW(MachineInstr *MI, MachineBasicBlock * MipsSETargetLowering::emitFILL_FD(MachineInstr *MI, MachineBasicBlock *BB) const { - assert(Subtarget->isFP64bit()); + assert(Subtarget.isFP64bit()); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned Wd = MI->getOperand(0).getReg(); @@ -2956,7 +3373,7 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr *MI, MachineBasicBlock * MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); const TargetRegisterClass *RC = &Mips::MSA128WRegClass; unsigned Ws1 = RegInfo.createVirtualRegister(RC); @@ -2985,7 +3402,7 @@ MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI, MachineBasicBlock * MipsSETargetLowering::emitFEXP2_D_1(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); const TargetRegisterClass *RC = &Mips::MSA128DRegClass; unsigned Ws1 = RegInfo.createVirtualRegister(RC);