X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FMips%2FMipsSEISelLowering.cpp;h=50a41d3afa50bedcdf2925c2d2c788001413072f;hb=ba29378fdc8aa184c0d7fa08022790b7ec7d8acf;hp=fb722515c878653328109c6cbf56dea704e84973;hpb=ad341d48f0fc131d1c31a0c824736e70c34e0476;p=oota-llvm.git diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index fb722515c87..50a41d3afa5 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -10,6 +10,7 @@ // Subclass of MipsTargetLowering specialized for mips32/64. // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "mips-isel" #include "MipsSEISelLowering.h" #include "MipsRegisterInfo.h" #include "MipsTargetMachine.h" @@ -17,6 +18,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; @@ -25,17 +28,35 @@ static cl::opt EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, cl::desc("MIPS: Enable tail calls."), cl::init(false)); +static cl::opt NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), + cl::desc("Expand double precision loads and " + "stores to their single precision " + "counterparts")); + MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) : MipsTargetLowering(TM) { // Set up the register classes - - clearRegisterClasses(); - addRegisterClass(MVT::i32, &Mips::GPR32RegClass); if (HasMips64) addRegisterClass(MVT::i64, &Mips::GPR64RegClass); + if (Subtarget->hasDSP() || Subtarget->hasMSA()) { + // Expand all truncating stores and extending loads. + unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; + + for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) { + for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1) + setTruncStoreAction((MVT::SimpleValueType)VT0, + (MVT::SimpleValueType)VT1, Expand); + + setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand); + setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand); + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand); + } + } + if (Subtarget->hasDSP()) { MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; @@ -53,20 +74,6 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::BITCAST, VecTys[i], Legal); } - // Expand all truncating stores and extending loads. - unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; - - for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) { - for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1) - setTruncStoreAction((MVT::SimpleValueType)VT0, - (MVT::SimpleValueType)VT1, Expand); - - setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand); - setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand); - setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand); - } - setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); @@ -78,16 +85,22 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::MUL, MVT::v2i16, Legal); if (Subtarget->hasMSA()) { - addMSAType(MVT::v16i8); - addMSAType(MVT::v8i16); - addMSAType(MVT::v4i32); - addMSAType(MVT::v2i64); - addMSAType(MVT::v8f16); - addMSAType(MVT::v4f32); - addMSAType(MVT::v2f64); + addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); + addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); + addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); + addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); + addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); + addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); + addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::VSELECT); + setTargetDAGCombine(ISD::XOR); } - if (!TM.Options.UseSoftFloat) { + if (!Subtarget->mipsSEUsesSoftFloat()) { addRegisterClass(MVT::f32, &Mips::FGR32RegClass); // When dealing with single precision only, use libcalls @@ -125,6 +138,15 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::MUL); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + + if (NoDPLoadStore) { + setOperationAction(ISD::LOAD, MVT::f64, Custom); + setOperationAction(ISD::STORE, MVT::f64, Custom); + } + computeRegisterProperties(); } @@ -133,9 +155,59 @@ llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { return new MipsSETargetLowering(TM); } -void -MipsSETargetLowering::addMSAType(MVT::SimpleValueType Ty) { - addRegisterClass(Ty, &Mips::MSA128RegClass); +// Enable MSA support for the given integer type and Register class. +void MipsSETargetLowering:: +addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + + setOperationAction(ISD::ADD, Ty, Legal); + setOperationAction(ISD::AND, Ty, Legal); + setOperationAction(ISD::CTLZ, Ty, Legal); + setOperationAction(ISD::CTPOP, Ty, Legal); + setOperationAction(ISD::MUL, Ty, Legal); + setOperationAction(ISD::OR, Ty, Legal); + setOperationAction(ISD::SDIV, Ty, Legal); + setOperationAction(ISD::SREM, Ty, Legal); + setOperationAction(ISD::SHL, Ty, Legal); + setOperationAction(ISD::SRA, Ty, Legal); + setOperationAction(ISD::SRL, Ty, Legal); + setOperationAction(ISD::SUB, Ty, Legal); + setOperationAction(ISD::UDIV, Ty, Legal); + setOperationAction(ISD::UREM, Ty, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); + setOperationAction(ISD::VSELECT, Ty, Legal); + setOperationAction(ISD::XOR, Ty, Legal); + + if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { + setOperationAction(ISD::FP_TO_SINT, Ty, Legal); + setOperationAction(ISD::FP_TO_UINT, Ty, Legal); + setOperationAction(ISD::SINT_TO_FP, Ty, Legal); + setOperationAction(ISD::UINT_TO_FP, Ty, Legal); + } + + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETNE, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); +} + +// Enable MSA support for the given floating-point type and Register class. +void MipsSETargetLowering:: +addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); // Expand all builtin opcodes. for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) @@ -144,6 +216,31 @@ MipsSETargetLowering::addMSAType(MVT::SimpleValueType Ty) { setOperationAction(ISD::LOAD, Ty, Legal); setOperationAction(ISD::STORE, Ty, Legal); setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + + if (Ty != MVT::v8f16) { + setOperationAction(ISD::FABS, Ty, Legal); + setOperationAction(ISD::FADD, Ty, Legal); + setOperationAction(ISD::FDIV, Ty, Legal); + setOperationAction(ISD::FEXP2, Ty, Legal); + setOperationAction(ISD::FLOG2, Ty, Legal); + setOperationAction(ISD::FMA, Ty, Legal); + setOperationAction(ISD::FMUL, Ty, Legal); + setOperationAction(ISD::FRINT, Ty, Legal); + setOperationAction(ISD::FSQRT, Ty, Legal); + setOperationAction(ISD::FSUB, Ty, Legal); + setOperationAction(ISD::VSELECT, Ty, Legal); + + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETOGE, Ty, Expand); + setCondCodeAction(ISD::SETOGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); + } } bool @@ -164,6 +261,8 @@ MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { SDValue MipsSETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch(Op.getOpcode()) { + case ISD::LOAD: return lowerLOAD(Op, DAG); + case ISD::STORE: return lowerSTORE(Op, DAG); case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); @@ -174,6 +273,10 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); } return MipsTargetLowering::LowerOperation(Op, DAG); @@ -226,7 +329,7 @@ static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { SDLoc DL(ADDENode); // Initialize accumulator. - SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, + SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, ADDCNode->getOperand(1), ADDENode->getOperand(1)); @@ -240,15 +343,11 @@ static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { // replace uses of adde and addc here if (!SDValue(ADDCNode, 0).use_empty()) { - SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32); - SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd, - LoIdx); + SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); } if (!SDValue(ADDENode, 0).use_empty()) { - SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32); - SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd, - HiIdx); + SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); } @@ -302,7 +401,7 @@ static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { SDLoc DL(SUBENode); // Initialize accumulator. - SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, + SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, SUBCNode->getOperand(0), SUBENode->getOperand(0)); @@ -316,15 +415,11 @@ static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { // replace uses of sube and subc here if (!SDValue(SUBCNode, 0).use_empty()) { - SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32); - SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub, - LoIdx); + SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub); CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); } if (!SDValue(SUBENode, 0).use_empty()) { - SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32); - SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub, - HiIdx); + SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub); CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); } @@ -344,6 +439,244 @@ static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT +// +// Performs the following transformations: +// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its +// sign/zero-extension is completely overwritten by the new one performed by +// the ISD::AND. +// - Removes redundant zero extensions performed by an ISD::AND. +static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + if (!Subtarget->hasMSA()) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + unsigned Op0Opcode = Op0->getOpcode(); + + // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) + // where $d + 1 == 2^n and n == 32 + // or $d + 1 == 2^n and n <= 32 and ZExt + // -> (MipsVExtractZExt $a, $b, $c) + if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || + Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { + ConstantSDNode *Mask = dyn_cast(Op1); + + if (!Mask) + return SDValue(); + + int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); + + if (Log2IfPositive <= 0) + return SDValue(); // Mask+1 is not a power of 2 + + SDValue Op0Op2 = Op0->getOperand(2); + EVT ExtendTy = cast(Op0Op2)->getVT(); + unsigned ExtendTySize = ExtendTy.getSizeInBits(); + unsigned Log2 = Log2IfPositive; + + if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || + Log2 == ExtendTySize) { + SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; + DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT, + Op0->getVTList(), Ops, Op0->getNumOperands()); + return Op0; + } + } + + return SDValue(); +} + +// Determine if the specified node is a constant vector splat. +// +// Returns true and sets Imm if: +// * N is a ISD::BUILD_VECTOR representing a constant splat +// +// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The +// differences are that it assumes the MSA has already been checked and the +// arbitrary requirement for a maximum of 32-bit integers isn't applied (and +// must not be in order for binsri.d to be selectable). +static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { + BuildVectorSDNode *Node = dyn_cast(N.getNode()); + + if (Node == NULL) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + 8, !IsLittleEndian)) + return false; + + Imm = SplatValue; + + return true; +} + +// Test whether the given node is an all-ones build_vector. +static bool isVectorAllOnes(SDValue N) { + // Look through bitcasts. Endianness doesn't matter because we are looking + // for an all-ones value. + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + BuildVectorSDNode *BVN = dyn_cast(N); + + if (!BVN) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + // Endianness doesn't matter in this context because we are looking for + // an all-ones value. + if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) + return SplatValue.isAllOnesValue(); + + return false; +} + +// Test whether N is the bitwise inverse of OfNode. +static bool isBitwiseInverse(SDValue N, SDValue OfNode) { + if (N->getOpcode() != ISD::XOR) + return false; + + if (isVectorAllOnes(N->getOperand(0))) + return N->getOperand(1) == OfNode; + + if (isVectorAllOnes(N->getOperand(1))) + return N->getOperand(0) == OfNode; + + return false; +} + +// Perform combines where ISD::OR is the root node. +// +// Performs the following transformations: +// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) +// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit +// vector type. +static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + if (!Subtarget->hasMSA()) + return SDValue(); + + EVT Ty = N->getValueType(0); + + if (!Ty.is128BitVector()) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { + SDValue Op0Op0 = Op0->getOperand(0); + SDValue Op0Op1 = Op0->getOperand(1); + SDValue Op1Op0 = Op1->getOperand(0); + SDValue Op1Op1 = Op1->getOperand(1); + bool IsLittleEndian = !Subtarget->isLittle(); + + SDValue IfSet, IfClr, Cond; + bool IsConstantMask = false; + APInt Mask, InvMask; + + // If Op0Op0 is an appropriate mask, try to find it's inverse in either + // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while + // looking. + // IfClr will be set if we find a valid match. + if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { + Cond = Op0Op0; + IfSet = Op0Op1; + + if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && Mask == ~InvMask) + IfClr = Op1Op1; + else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && Mask == ~InvMask) + IfClr = Op1Op0; + + IsConstantMask = true; + } + + // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same + // thing again using this mask. + // IfClr will be set if we find a valid match. + if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { + Cond = Op0Op1; + IfSet = Op0Op0; + + if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && Mask == ~InvMask) + IfClr = Op1Op1; + else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && Mask == ~InvMask) + IfClr = Op1Op0; + + IsConstantMask = true; + } + + // If IfClr is not yet set, try looking for a non-constant match. + // IfClr will be set if we find a valid match amongst the eight + // possibilities. + if (!IfClr.getNode()) { + if (isBitwiseInverse(Op0Op0, Op1Op0)) { + Cond = Op1Op0; + IfSet = Op1Op1; + IfClr = Op0Op1; + } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { + Cond = Op1Op0; + IfSet = Op1Op1; + IfClr = Op0Op0; + } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { + Cond = Op1Op1; + IfSet = Op1Op0; + IfClr = Op0Op1; + } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { + Cond = Op1Op1; + IfSet = Op1Op0; + IfClr = Op0Op0; + } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { + Cond = Op0Op0; + IfSet = Op0Op1; + IfClr = Op1Op1; + } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { + Cond = Op0Op0; + IfSet = Op0Op1; + IfClr = Op1Op0; + } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { + Cond = Op0Op1; + IfSet = Op0Op0; + IfClr = Op1Op1; + } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { + Cond = Op0Op1; + IfSet = Op0Op0; + IfClr = Op1Op0; + } + } + + // At this point, IfClr will be set if we have a valid match. + if (!IfClr.getNode()) + return SDValue(); + + assert(Cond.getNode() && IfSet.getNode()); + + // Fold degenerate cases. + if (IsConstantMask) { + if (Mask.isAllOnesValue()) + return IfSet; + else if (Mask == 0) + return IfClr; + } + + // Transform the DAG into an equivalent VSELECT. + return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfClr, IfSet); + } + + return SDValue(); +} + static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { @@ -440,11 +773,57 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); } +// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold +// constant splats into MipsISD::SHRA_DSP for DSPr2. +// +// Performs the following transformations: +// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its +// sign/zero-extension is completely overwritten by the new one performed by +// the ISD::SRA and ISD::SHL nodes. +// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL +// sequence. +// +// See performDSPShiftCombine for more information about the transformation +// used for DSPr2. static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { EVT Ty = N->getValueType(0); + if (Subtarget->hasMSA()) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) + // where $d + sizeof($c) == 32 + // or $d + sizeof($c) <= 32 and SExt + // -> (MipsVExtractSExt $a, $b, $c) + if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { + SDValue Op0Op0 = Op0->getOperand(0); + ConstantSDNode *ShAmount = dyn_cast(Op1); + + if (!ShAmount) + return SDValue(); + + if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && + Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) + return SDValue(); + + EVT ExtendTy = cast(Op0Op0->getOperand(2))->getVT(); + unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); + + if (TotalBits == 32 || + (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && + TotalBits <= 32)) { + SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), + Op0Op0->getOperand(2) }; + DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT, + Op0Op0->getVTList(), Ops, Op0Op0->getNumOperands()); + return Op0Op0; + } + } + } + if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2())) return SDValue(); @@ -497,17 +876,84 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { EVT Ty = N->getValueType(0); - if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) - return SDValue(); + if (Ty.is128BitVector() && Ty.isInteger()) { + // Try the following combines: + // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b) + // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b) + // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b) + // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b) + // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b) + // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b) + // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b) + // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b) + // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but + // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the + // legalizer. + SDValue Op0 = N->getOperand(0); + + if (Op0->getOpcode() != ISD::SETCC) + return SDValue(); + + ISD::CondCode CondCode = cast(Op0->getOperand(2))->get(); + bool Signed; + + if (CondCode == ISD::SETLT || CondCode == ISD::SETLE) + Signed = true; + else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE) + Signed = false; + else + return SDValue(); + + SDValue Op1 = N->getOperand(1); + SDValue Op2 = N->getOperand(2); + SDValue Op0Op0 = Op0->getOperand(0); + SDValue Op0Op1 = Op0->getOperand(1); + + if (Op1 == Op0Op0 && Op2 == Op0Op1) + return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N), + Ty, Op1, Op2); + else if (Op1 == Op0Op1 && Op2 == Op0Op0) + return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N), + Ty, Op1, Op2); + } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) { + SDValue SetCC = N->getOperand(0); + + if (SetCC.getOpcode() != MipsISD::SETCC_DSP) + return SDValue(); + + return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, + SetCC.getOperand(0), SetCC.getOperand(1), + N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); + } - SDValue SetCC = N->getOperand(0); + return SDValue(); +} - if (SetCC.getOpcode() != MipsISD::SETCC_DSP) - return SDValue(); +static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, + const MipsSubtarget *Subtarget) { + EVT Ty = N->getValueType(0); + + if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { + // Try the following combines: + // (xor (or $a, $b), (build_vector allones)) + // (xor (or $a, $b), (bitcast (build_vector allones))) + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue NotOp; + + if (ISD::isBuildVectorAllOnes(Op0.getNode())) + NotOp = Op1; + else if (ISD::isBuildVectorAllOnes(Op1.getNode())) + NotOp = Op0; + else + return SDValue(); + + if (NotOp->getOpcode() == ISD::OR) + return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), + NotOp->getOperand(1)); + } - return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, - SetCC.getOperand(0), SetCC.getOperand(1), N->getOperand(1), - N->getOperand(2), SetCC.getOperand(2)); + return SDValue(); } SDValue @@ -518,6 +964,12 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { case ISD::ADDE: return performADDECombine(N, DAG, DCI, Subtarget); + case ISD::AND: + Val = performANDCombine(N, DAG, DCI, Subtarget); + break; + case ISD::OR: + Val = performORCombine(N, DAG, DCI, Subtarget); + break; case ISD::SUBE: return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::MUL: @@ -530,14 +982,22 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { return performSRLCombine(N, DAG, DCI, Subtarget); case ISD::VSELECT: return performVSELECTCombine(N, DAG); - case ISD::SETCC: { + case ISD::XOR: + Val = performXORCombine(N, DAG, Subtarget); + break; + case ISD::SETCC: Val = performSETCCCombine(N, DAG); break; } - } - if (Val.getNode()) + if (Val.getNode()) { + DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; + N->printrWithDepth(dbgs(), &DAG); + dbgs() << "\n=> \n"; + Val.getNode()->printrWithDepth(dbgs(), &DAG); + dbgs() << "\n"); return Val; + } return MipsTargetLowering::PerformDAGCombine(N, DCI); } @@ -550,6 +1010,42 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); case Mips::BPOSGE32_PSEUDO: return emitBPOSGE32(MI, BB); + case Mips::SNZ_B_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); + case Mips::SNZ_H_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); + case Mips::SNZ_W_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); + case Mips::SNZ_D_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); + case Mips::SNZ_V_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); + case Mips::SZ_B_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); + case Mips::SZ_H_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); + case Mips::SZ_W_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); + case Mips::SZ_D_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); + case Mips::SZ_V_PSEUDO: + return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); + case Mips::COPY_FW_PSEUDO: + return emitCOPY_FW(MI, BB); + case Mips::COPY_FD_PSEUDO: + return emitCOPY_FD(MI, BB); + case Mips::INSERT_FW_PSEUDO: + return emitINSERT_FW(MI, BB); + case Mips::INSERT_FD_PSEUDO: + return emitINSERT_FD(MI, BB); + case Mips::FILL_FW_PSEUDO: + return emitFILL_FW(MI, BB); + case Mips::FILL_FD_PSEUDO: + return emitFILL_FD(MI, BB); + case Mips::FEXP2_W_1_PSEUDO: + return emitFEXP2_W_1(MI, BB); + case Mips::FEXP2_D_1_PSEUDO: + return emitFEXP2_D_1(MI, BB); } } @@ -586,6 +1082,68 @@ getOpndList(SmallVectorImpl &Ops, InternalLinkage, CLI, Callee, Chain); } +SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { + LoadSDNode &Nd = *cast(Op); + + if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) + return MipsTargetLowering::lowerLOAD(Op, DAG); + + // Replace a double precision load with two i32 loads and a buildpair64. + SDLoc DL(Op); + SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); + EVT PtrVT = Ptr.getValueType(); + + // i32 load from lower address. + SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, + MachinePointerInfo(), Nd.isVolatile(), + Nd.isNonTemporal(), Nd.isInvariant(), + Nd.getAlignment()); + + // i32 load from higher address. + Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); + SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr, + MachinePointerInfo(), Nd.isVolatile(), + Nd.isNonTemporal(), Nd.isInvariant(), + std::min(Nd.getAlignment(), 4U)); + + if (!Subtarget->isLittle()) + std::swap(Lo, Hi); + + SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); + SDValue Ops[2] = {BP, Hi.getValue(1)}; + return DAG.getMergeValues(Ops, 2, DL); +} + +SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { + StoreSDNode &Nd = *cast(Op); + + if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) + return MipsTargetLowering::lowerSTORE(Op, DAG); + + // Replace a double precision store with two extractelement64s and i32 stores. + SDLoc DL(Op); + SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); + EVT PtrVT = Ptr.getValueType(); + SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, + Val, DAG.getConstant(0, MVT::i32)); + SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, + Val, DAG.getConstant(1, MVT::i32)); + + if (!Subtarget->isLittle()) + std::swap(Lo, Hi); + + // i32 store to lower address. + Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), + Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(), + Nd.getTBAAInfo()); + + // i32 store to higher address. + Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); + return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), + Nd.isVolatile(), Nd.isNonTemporal(), + std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo()); +} + SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, SelectionDAG &DAG) const { @@ -596,11 +1154,9 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, SDValue Lo, Hi; if (HasLo) - Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult, - DAG.getConstant(Mips::sub_lo, MVT::i32)); + Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); if (HasHi) - Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult, - DAG.getConstant(Mips::sub_hi, MVT::i32)); + Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); if (!HasLo || !HasHi) return HasLo ? Lo : Hi; @@ -615,14 +1171,12 @@ static SDValue initAccumulator(SDValue In, SDLoc DL, SelectionDAG &DAG) { DAG.getConstant(0, MVT::i32)); SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, DAG.getConstant(1, MVT::i32)); - return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi); + return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); } static SDValue extractLOHI(SDValue Op, SDLoc DL, SelectionDAG &DAG) { - SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op, - DAG.getConstant(Mips::sub_lo, MVT::i32)); - SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op, - DAG.getConstant(Mips::sub_hi, MVT::i32)); + SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); + SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); } @@ -686,8 +1240,58 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return DAG.getMergeValues(Vals, 2, DL); } +// Lower an MSA copy intrinsic into the specified SelectionDAG node +static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { + SDLoc DL(Op); + SDValue Vec = Op->getOperand(1); + SDValue Idx = Op->getOperand(2); + EVT ResTy = Op->getValueType(0); + EVT EltTy = Vec->getValueType(0).getVectorElementType(); + + SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, + DAG.getValueType(EltTy)); + + return Result; +} + +static SDValue +lowerMSASplatImm(SDLoc DL, EVT ResTy, SDValue ImmOp, SelectionDAG &DAG) { + EVT ViaVecTy = ResTy; + SmallVector Ops; + SDValue ImmHiOp; + + if (ViaVecTy == MVT::v2i64) { + ImmHiOp = DAG.getNode(ISD::SRA, DL, MVT::i32, ImmOp, + DAG.getConstant(31, MVT::i32)); + for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) { + Ops.push_back(ImmHiOp); + Ops.push_back(ImmOp); + } + ViaVecTy = MVT::v4i32; + } else { + for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) + Ops.push_back(ImmOp); + } + + SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, &Ops[0], + Ops.size()); + + if (ResTy != ViaVecTy) + Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result); + + return Result; +} + +static SDValue +lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { + return lowerMSASplatImm(SDLoc(Op), Op->getValueType(0), + Op->getOperand(ImmOp), DAG); +} + SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + switch (cast(Op->getOperand(0))->getZExtValue()) { default: return SDValue(); @@ -723,12 +1327,566 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerDSPIntr(Op, DAG, MipsISD::MSub); case Intrinsic::mips_msubu: return lowerDSPIntr(Op, DAG, MipsISD::MSubu); + case Intrinsic::mips_addv_b: + case Intrinsic::mips_addv_h: + case Intrinsic::mips_addv_w: + case Intrinsic::mips_addv_d: + return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_addvi_b: + case Intrinsic::mips_addvi_h: + case Intrinsic::mips_addvi_w: + case Intrinsic::mips_addvi_d: + return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_and_v: + return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_andi_b: + return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_binsli_b: + case Intrinsic::mips_binsli_h: + case Intrinsic::mips_binsli_w: + case Intrinsic::mips_binsli_d: { + EVT VecTy = Op->getValueType(0); + EVT EltTy = VecTy.getVectorElementType(); + APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), + Op->getConstantOperandVal(3)); + return DAG.getNode(ISD::VSELECT, DL, VecTy, + DAG.getConstant(Mask, VecTy, true), Op->getOperand(1), + Op->getOperand(2)); + } + case Intrinsic::mips_binsri_b: + case Intrinsic::mips_binsri_h: + case Intrinsic::mips_binsri_w: + case Intrinsic::mips_binsri_d: { + EVT VecTy = Op->getValueType(0); + EVT EltTy = VecTy.getVectorElementType(); + APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), + Op->getConstantOperandVal(3)); + return DAG.getNode(ISD::VSELECT, DL, VecTy, + DAG.getConstant(Mask, VecTy, true), Op->getOperand(1), + Op->getOperand(2)); + } + case Intrinsic::mips_bmnz_v: + return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), + Op->getOperand(2), Op->getOperand(1)); + case Intrinsic::mips_bmnzi_b: + return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), + lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), + Op->getOperand(1)); + case Intrinsic::mips_bmz_v: + return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_bmzi_b: + return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), + lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_bnz_b: + case Intrinsic::mips_bnz_h: + case Intrinsic::mips_bnz_w: + case Intrinsic::mips_bnz_d: + return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::mips_bnz_v: + return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::mips_bsel_v: + return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), + Op->getOperand(3)); + case Intrinsic::mips_bseli_b: + return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), + lowerMSASplatImm(Op, 3, DAG)); + case Intrinsic::mips_bz_b: + case Intrinsic::mips_bz_h: + case Intrinsic::mips_bz_w: + case Intrinsic::mips_bz_d: + return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::mips_bz_v: + return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::mips_ceq_b: + case Intrinsic::mips_ceq_h: + case Intrinsic::mips_ceq_w: + case Intrinsic::mips_ceq_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETEQ); + case Intrinsic::mips_ceqi_b: + case Intrinsic::mips_ceqi_h: + case Intrinsic::mips_ceqi_w: + case Intrinsic::mips_ceqi_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG), ISD::SETEQ); + case Intrinsic::mips_cle_s_b: + case Intrinsic::mips_cle_s_h: + case Intrinsic::mips_cle_s_w: + case Intrinsic::mips_cle_s_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETLE); + case Intrinsic::mips_clei_s_b: + case Intrinsic::mips_clei_s_h: + case Intrinsic::mips_clei_s_w: + case Intrinsic::mips_clei_s_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG), ISD::SETLE); + case Intrinsic::mips_cle_u_b: + case Intrinsic::mips_cle_u_h: + case Intrinsic::mips_cle_u_w: + case Intrinsic::mips_cle_u_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETULE); + case Intrinsic::mips_clei_u_b: + case Intrinsic::mips_clei_u_h: + case Intrinsic::mips_clei_u_w: + case Intrinsic::mips_clei_u_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); + case Intrinsic::mips_clt_s_b: + case Intrinsic::mips_clt_s_h: + case Intrinsic::mips_clt_s_w: + case Intrinsic::mips_clt_s_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETLT); + case Intrinsic::mips_clti_s_b: + case Intrinsic::mips_clti_s_h: + case Intrinsic::mips_clti_s_w: + case Intrinsic::mips_clti_s_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG), ISD::SETLT); + case Intrinsic::mips_clt_u_b: + case Intrinsic::mips_clt_u_h: + case Intrinsic::mips_clt_u_w: + case Intrinsic::mips_clt_u_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETULT); + case Intrinsic::mips_clti_u_b: + case Intrinsic::mips_clti_u_h: + case Intrinsic::mips_clti_u_w: + case Intrinsic::mips_clti_u_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); + case Intrinsic::mips_copy_s_b: + case Intrinsic::mips_copy_s_h: + case Intrinsic::mips_copy_s_w: + return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); + case Intrinsic::mips_copy_s_d: + // Don't lower directly into VEXTRACT_SEXT_ELT since i64 might be illegal. + // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type + // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_copy_u_b: + case Intrinsic::mips_copy_u_h: + case Intrinsic::mips_copy_u_w: + return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); + case Intrinsic::mips_copy_u_d: + // Don't lower directly into VEXTRACT_ZEXT_ELT since i64 might be illegal. + // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type + // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. + // + // Note: When i64 is illegal, this results in copy_s.w instructions instead + // of copy_u.w instructions. This makes no difference to the behaviour + // since i64 is only illegal when the register file is 32-bit. + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_div_s_b: + case Intrinsic::mips_div_s_h: + case Intrinsic::mips_div_s_w: + case Intrinsic::mips_div_s_d: + return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_div_u_b: + case Intrinsic::mips_div_u_h: + case Intrinsic::mips_div_u_w: + case Intrinsic::mips_div_u_d: + return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_fadd_w: + case Intrinsic::mips_fadd_d: + return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away + case Intrinsic::mips_fceq_w: + case Intrinsic::mips_fceq_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETOEQ); + case Intrinsic::mips_fcle_w: + case Intrinsic::mips_fcle_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETOLE); + case Intrinsic::mips_fclt_w: + case Intrinsic::mips_fclt_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETOLT); + case Intrinsic::mips_fcne_w: + case Intrinsic::mips_fcne_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETONE); + case Intrinsic::mips_fcor_w: + case Intrinsic::mips_fcor_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETO); + case Intrinsic::mips_fcueq_w: + case Intrinsic::mips_fcueq_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETUEQ); + case Intrinsic::mips_fcule_w: + case Intrinsic::mips_fcule_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETULE); + case Intrinsic::mips_fcult_w: + case Intrinsic::mips_fcult_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETULT); + case Intrinsic::mips_fcun_w: + case Intrinsic::mips_fcun_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETUO); + case Intrinsic::mips_fcune_w: + case Intrinsic::mips_fcune_d: + return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2), ISD::SETUNE); + case Intrinsic::mips_fdiv_w: + case Intrinsic::mips_fdiv_d: + return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_ffint_u_w: + case Intrinsic::mips_ffint_u_d: + return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::mips_ffint_s_w: + case Intrinsic::mips_ffint_s_d: + return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::mips_fill_b: + case Intrinsic::mips_fill_h: + case Intrinsic::mips_fill_w: + case Intrinsic::mips_fill_d: { + SmallVector Ops; + EVT ResTy = Op->getValueType(0); + + for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) + Ops.push_back(Op->getOperand(1)); + + // If ResTy is v2i64 then the type legalizer will break this node down into + // an equivalent v4i32. + return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, &Ops[0], Ops.size()); + } + case Intrinsic::mips_fexp2_w: + case Intrinsic::mips_fexp2_d: { + EVT ResTy = Op->getValueType(0); + return DAG.getNode( + ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), + DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); + } + case Intrinsic::mips_flog2_w: + case Intrinsic::mips_flog2_d: + return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::mips_fmadd_w: + case Intrinsic::mips_fmadd_d: + return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + case Intrinsic::mips_fmul_w: + case Intrinsic::mips_fmul_d: + return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_fmsub_w: + case Intrinsic::mips_fmsub_d: { + EVT ResTy = Op->getValueType(0); + return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1), + DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy, + Op->getOperand(2), Op->getOperand(3))); + } + case Intrinsic::mips_frint_w: + case Intrinsic::mips_frint_d: + return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::mips_fsqrt_w: + case Intrinsic::mips_fsqrt_d: + return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::mips_fsub_w: + case Intrinsic::mips_fsub_d: + return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_ftrunc_u_w: + case Intrinsic::mips_ftrunc_u_d: + return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::mips_ftrunc_s_w: + case Intrinsic::mips_ftrunc_s_d: + return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::mips_ilvev_b: + case Intrinsic::mips_ilvev_h: + case Intrinsic::mips_ilvev_w: + case Intrinsic::mips_ilvev_d: + return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_ilvl_b: + case Intrinsic::mips_ilvl_h: + case Intrinsic::mips_ilvl_w: + case Intrinsic::mips_ilvl_d: + return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_ilvod_b: + case Intrinsic::mips_ilvod_h: + case Intrinsic::mips_ilvod_w: + case Intrinsic::mips_ilvod_d: + return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_ilvr_b: + case Intrinsic::mips_ilvr_h: + case Intrinsic::mips_ilvr_w: + case Intrinsic::mips_ilvr_d: + return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_insert_b: + case Intrinsic::mips_insert_h: + case Intrinsic::mips_insert_w: + case Intrinsic::mips_insert_d: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); + case Intrinsic::mips_ldi_b: + case Intrinsic::mips_ldi_h: + case Intrinsic::mips_ldi_w: + case Intrinsic::mips_ldi_d: + return lowerMSASplatImm(Op, 1, DAG); + case Intrinsic::mips_lsa: { + EVT ResTy = Op->getValueType(0); + return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), + DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, + Op->getOperand(2), Op->getOperand(3))); + } + case Intrinsic::mips_maddv_b: + case Intrinsic::mips_maddv_h: + case Intrinsic::mips_maddv_w: + case Intrinsic::mips_maddv_d: { + EVT ResTy = Op->getValueType(0); + return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, + Op->getOperand(2), Op->getOperand(3))); + } + case Intrinsic::mips_max_s_b: + case Intrinsic::mips_max_s_h: + case Intrinsic::mips_max_s_w: + case Intrinsic::mips_max_s_d: + return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_max_u_b: + case Intrinsic::mips_max_u_h: + case Intrinsic::mips_max_u_w: + case Intrinsic::mips_max_u_d: + return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_maxi_s_b: + case Intrinsic::mips_maxi_s_h: + case Intrinsic::mips_maxi_s_w: + case Intrinsic::mips_maxi_s_d: + return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_maxi_u_b: + case Intrinsic::mips_maxi_u_h: + case Intrinsic::mips_maxi_u_w: + case Intrinsic::mips_maxi_u_d: + return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_min_s_b: + case Intrinsic::mips_min_s_h: + case Intrinsic::mips_min_s_w: + case Intrinsic::mips_min_s_d: + return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_min_u_b: + case Intrinsic::mips_min_u_h: + case Intrinsic::mips_min_u_w: + case Intrinsic::mips_min_u_d: + return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_mini_s_b: + case Intrinsic::mips_mini_s_h: + case Intrinsic::mips_mini_s_w: + case Intrinsic::mips_mini_s_d: + return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_mini_u_b: + case Intrinsic::mips_mini_u_h: + case Intrinsic::mips_mini_u_w: + case Intrinsic::mips_mini_u_d: + return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_mod_s_b: + case Intrinsic::mips_mod_s_h: + case Intrinsic::mips_mod_s_w: + case Intrinsic::mips_mod_s_d: + return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_mod_u_b: + case Intrinsic::mips_mod_u_h: + case Intrinsic::mips_mod_u_w: + case Intrinsic::mips_mod_u_d: + return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_mulv_b: + case Intrinsic::mips_mulv_h: + case Intrinsic::mips_mulv_w: + case Intrinsic::mips_mulv_d: + return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_msubv_b: + case Intrinsic::mips_msubv_h: + case Intrinsic::mips_msubv_w: + case Intrinsic::mips_msubv_d: { + EVT ResTy = Op->getValueType(0); + return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, + Op->getOperand(2), Op->getOperand(3))); + } + case Intrinsic::mips_nlzc_b: + case Intrinsic::mips_nlzc_h: + case Intrinsic::mips_nlzc_w: + case Intrinsic::mips_nlzc_d: + return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::mips_nor_v: { + SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + return DAG.getNOT(DL, Res, Res->getValueType(0)); + } + case Intrinsic::mips_nori_b: { + SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), + Op->getOperand(1), + lowerMSASplatImm(Op, 2, DAG)); + return DAG.getNOT(DL, Res, Res->getValueType(0)); + } + case Intrinsic::mips_or_v: + return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_ori_b: + return DAG.getNode(ISD::OR, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_pckev_b: + case Intrinsic::mips_pckev_h: + case Intrinsic::mips_pckev_w: + case Intrinsic::mips_pckev_d: + return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_pckod_b: + case Intrinsic::mips_pckod_h: + case Intrinsic::mips_pckod_w: + case Intrinsic::mips_pckod_d: + return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_pcnt_b: + case Intrinsic::mips_pcnt_h: + case Intrinsic::mips_pcnt_w: + case Intrinsic::mips_pcnt_d: + return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::mips_shf_b: + case Intrinsic::mips_shf_h: + case Intrinsic::mips_shf_w: + return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), + Op->getOperand(2), Op->getOperand(1)); + case Intrinsic::mips_sll_b: + case Intrinsic::mips_sll_h: + case Intrinsic::mips_sll_w: + case Intrinsic::mips_sll_d: + return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_slli_b: + case Intrinsic::mips_slli_h: + case Intrinsic::mips_slli_w: + case Intrinsic::mips_slli_d: + return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_splat_b: + case Intrinsic::mips_splat_h: + case Intrinsic::mips_splat_w: + case Intrinsic::mips_splat_d: + // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle + // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because + // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. + // Instead we lower to MipsISD::VSHF and match from there. + return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), + lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), + Op->getOperand(1)); + case Intrinsic::mips_splati_b: + case Intrinsic::mips_splati_h: + case Intrinsic::mips_splati_w: + case Intrinsic::mips_splati_d: + return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), + lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), + Op->getOperand(1)); + case Intrinsic::mips_sra_b: + case Intrinsic::mips_sra_h: + case Intrinsic::mips_sra_w: + case Intrinsic::mips_sra_d: + return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_srai_b: + case Intrinsic::mips_srai_h: + case Intrinsic::mips_srai_w: + case Intrinsic::mips_srai_d: + return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_srl_b: + case Intrinsic::mips_srl_h: + case Intrinsic::mips_srl_w: + case Intrinsic::mips_srl_d: + return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_srli_b: + case Intrinsic::mips_srli_h: + case Intrinsic::mips_srli_w: + case Intrinsic::mips_srli_d: + return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_subv_b: + case Intrinsic::mips_subv_h: + case Intrinsic::mips_subv_w: + case Intrinsic::mips_subv_d: + return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_subvi_b: + case Intrinsic::mips_subvi_h: + case Intrinsic::mips_subvi_w: + case Intrinsic::mips_subvi_d: + return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_vshf_b: + case Intrinsic::mips_vshf_h: + case Intrinsic::mips_vshf_w: + case Intrinsic::mips_vshf_d: + return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + case Intrinsic::mips_xor_v: + return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_xori_b: + return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); } } +static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Address = Op->getOperand(2); + SDValue Offset = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + EVT PtrTy = Address->getValueType(0); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + + return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), false, + false, false, 16); +} + SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { - switch (cast(Op->getOperand(1))->getZExtValue()) { + unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); + switch (Intr) { default: return SDValue(); case Intrinsic::mips_extp: @@ -771,7 +1929,524 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); case Intrinsic::mips_dpsqx_sa_w_ph: return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); + case Intrinsic::mips_ld_b: + case Intrinsic::mips_ld_h: + case Intrinsic::mips_ld_w: + case Intrinsic::mips_ld_d: + return lowerMSALoadIntr(Op, DAG, Intr); + } +} + +static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Value = Op->getOperand(2); + SDValue Address = Op->getOperand(3); + SDValue Offset = Op->getOperand(4); + EVT PtrTy = Address->getValueType(0); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + + return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), false, + false, 16); +} + +SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); + switch (Intr) { + default: + return SDValue(); + case Intrinsic::mips_st_b: + case Intrinsic::mips_st_h: + case Intrinsic::mips_st_w: + case Intrinsic::mips_st_d: + return lowerMSAStoreIntr(Op, DAG, Intr); + } +} + +/// \brief Check if the given BuildVectorSDNode is a splat. +/// This method currently relies on DAG nodes being reused when equivalent, +/// so it's possible for this to return false even when isConstantSplat returns +/// true. +static bool isSplatVector(const BuildVectorSDNode *N) { + unsigned int nOps = N->getNumOperands(); + assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); + + SDValue Operand0 = N->getOperand(0); + + for (unsigned int i = 1; i < nOps; ++i) { + if (N->getOperand(i) != Operand0) + return false; + } + + return true; +} + +// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. +// +// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We +// choose to sign-extend but we could have equally chosen zero-extend. The +// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT +// result into this node later (possibly changing it to a zero-extend in the +// process). +SDValue MipsSETargetLowering:: +lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + SDValue Op0 = Op->getOperand(0); + EVT VecTy = Op0->getValueType(0); + + if (!VecTy.is128BitVector()) + return SDValue(); + + if (ResTy.isInteger()) { + SDValue Op1 = Op->getOperand(1); + EVT EltTy = VecTy.getVectorElementType(); + return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, + DAG.getValueType(EltTy)); + } + + return Op; +} + +static bool isConstantOrUndef(const SDValue Op) { + if (Op->getOpcode() == ISD::UNDEF) + return true; + if (dyn_cast(Op)) + return true; + if (dyn_cast(Op)) + return true; + return false; +} + +static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { + for (unsigned i = 0; i < Op->getNumOperands(); ++i) + if (isConstantOrUndef(Op->getOperand(i))) + return true; + return false; +} + +// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the +// backend. +// +// Lowers according to the following rules: +// - Constant splats are legal as-is as long as the SplatBitSize is a power of +// 2 less than or equal to 64 and the value fits into a signed 10-bit +// immediate +// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize +// is a power of 2 less than or equal to 64 and the value does not fit into a +// signed 10-bit immediate +// - Non-constant splats are legal as-is. +// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. +// - All others are illegal and must be expanded. +SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + BuildVectorSDNode *Node = cast(Op); + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Subtarget->hasMSA() || !ResTy.is128BitVector()) + return SDValue(); + + if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8, + !Subtarget->isLittle()) && SplatBitSize <= 64) { + // We can only cope with 8, 16, 32, or 64-bit elements + if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && + SplatBitSize != 64) + return SDValue(); + + // If the value fits into a simm10 then we can use ldi.[bhwd] + if (SplatValue.isSignedIntN(10)) + return Op; + + EVT ViaVecTy; + + switch (SplatBitSize) { + default: + return SDValue(); + case 8: + ViaVecTy = MVT::v16i8; + break; + case 16: + ViaVecTy = MVT::v8i16; + break; + case 32: + ViaVecTy = MVT::v4i32; + break; + case 64: + // There's no fill.d to fall back on for 64-bit values + return SDValue(); + } + + SmallVector Ops; + SDValue Constant = DAG.getConstant(SplatValue.sextOrSelf(32), MVT::i32); + + for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) + Ops.push_back(Constant); + + SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Node), ViaVecTy, + &Ops[0], Ops.size()); + + if (ViaVecTy != ResTy) + Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); + + return Result; + } else if (isSplatVector(Node)) + return Op; + else if (!isConstantOrUndefBUILD_VECTOR(Node)) { + // Use INSERT_VECTOR_ELT operations rather than expand to stores. + // The resulting code is the same length as the expansion, but it doesn't + // use memory operations + EVT ResTy = Node->getValueType(0); + + assert(ResTy.isVector()); + + unsigned NumElts = ResTy.getVectorNumElements(); + SDValue Vector = DAG.getUNDEF(ResTy); + for (unsigned i = 0; i < NumElts; ++i) { + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, + Node->getOperand(i), + DAG.getConstant(i, MVT::i32)); + } + return Vector; + } + + return SDValue(); +} + +// Lower VECTOR_SHUFFLE into SHF (if possible). +// +// SHF splits the vector into blocks of four elements, then shuffles these +// elements according to a <4 x i2> constant (encoded as an integer immediate). +// +// It is therefore possible to lower into SHF when the mask takes the form: +// +// When undef's appear they are treated as if they were whatever value is +// necessary in order to fit the above form. +// +// For example: +// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, +// <8 x i32> +// is lowered to: +// (SHF_H $w0, $w1, 27) +// where the 27 comes from: +// 3 + (2 << 2) + (1 << 4) + (0 << 6) +static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + int SHFIndices[4] = { -1, -1, -1, -1 }; + + if (Indices.size() < 4) + return SDValue(); + + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = i; j < Indices.size(); j += 4) { + int Idx = Indices[j]; + + // Convert from vector index to 4-element subvector index + // If an index refers to an element outside of the subvector then give up + if (Idx != -1) { + Idx -= 4 * (j / 4); + if (Idx < 0 || Idx >= 4) + return SDValue(); + } + + // If the mask has an undef, replace it with the current index. + // Note that it might still be undef if the current index is also undef + if (SHFIndices[i] == -1) + SHFIndices[i] = Idx; + + // Check that non-undef values are the same as in the mask. If they + // aren't then give up + if (!(Idx == -1 || Idx == SHFIndices[i])) + return SDValue(); + } + } + + // Calculate the immediate. Replace any remaining undefs with zero + APInt Imm(32, 0); + for (int i = 3; i >= 0; --i) { + int Idx = SHFIndices[i]; + + if (Idx == -1) + Idx = 0; + + Imm <<= 2; + Imm |= Idx & 0x3; + } + + return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy, + DAG.getConstant(Imm, MVT::i32), Op->getOperand(0)); +} + +// Lower VECTOR_SHUFFLE into ILVEV (if possible). +// +// ILVEV interleaves the even elements from each vector. +// +// It is possible to lower into ILVEV when the mask takes the form: +// <0, n, 2, n+2, 4, n+4, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int WsIdx = 0; + int WtIdx = ResTy.getVectorNumElements(); + + for (unsigned i = 0; i < Indices.size(); i += 2) { + if (Indices[i] != -1 && Indices[i] != WsIdx) + return SDValue(); + if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) + return SDValue(); + WsIdx += 2; + WtIdx += 2; + } + + return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into ILVOD (if possible). +// +// ILVOD interleaves the odd elements from each vector. +// +// It is possible to lower into ILVOD when the mask takes the form: +// <1, n+1, 3, n+3, 5, n+5, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int WsIdx = 1; + int WtIdx = ResTy.getVectorNumElements() + 1; + + for (unsigned i = 0; i < Indices.size(); i += 2) { + if (Indices[i] != -1 && Indices[i] != WsIdx) + return SDValue(); + if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) + return SDValue(); + WsIdx += 2; + WtIdx += 2; + } + + return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into ILVL (if possible). +// +// ILVL interleaves consecutive elements from the left half of each vector. +// +// It is possible to lower into ILVL when the mask takes the form: +// <0, n, 1, n+1, 2, n+2, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int WsIdx = 0; + int WtIdx = ResTy.getVectorNumElements(); + + for (unsigned i = 0; i < Indices.size(); i += 2) { + if (Indices[i] != -1 && Indices[i] != WsIdx) + return SDValue(); + if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) + return SDValue(); + WsIdx ++; + WtIdx ++; + } + + return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into ILVR (if possible). +// +// ILVR interleaves consecutive elements from the right half of each vector. +// +// It is possible to lower into ILVR when the mask takes the form: +// +// where n is the number of elements in the vector and x is half n. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + unsigned NumElts = ResTy.getVectorNumElements(); + int WsIdx = NumElts / 2; + int WtIdx = NumElts + NumElts / 2; + + for (unsigned i = 0; i < Indices.size(); i += 2) { + if (Indices[i] != -1 && Indices[i] != WsIdx) + return SDValue(); + if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) + return SDValue(); + WsIdx ++; + WtIdx ++; + } + + return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into PCKEV (if possible). +// +// PCKEV copies the even elements of each vector into the result vector. +// +// It is possible to lower into PCKEV when the mask takes the form: +// <0, 2, 4, ..., n, n+2, n+4, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int Idx = 0; + + for (unsigned i = 0; i < Indices.size(); ++i) { + if (Indices[i] != -1 && Indices[i] != Idx) + return SDValue(); + Idx += 2; + } + + return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into PCKOD (if possible). +// +// PCKOD copies the odd elements of each vector into the result vector. +// +// It is possible to lower into PCKOD when the mask takes the form: +// <1, 3, 5, ..., n+1, n+3, n+5, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int Idx = 1; + + for (unsigned i = 0; i < Indices.size(); ++i) { + if (Indices[i] != -1 && Indices[i] != Idx) + return SDValue(); + Idx += 2; + } + + return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into VSHF. +// +// This mostly consists of converting the shuffle indices in Indices into a +// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is +// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, +// if the type is v8i16 and all the indices are less than 8 then the second +// operand is unused and can be replaced with anything. We choose to replace it +// with the used operand since this reduces the number of instructions overall. +static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + SmallVector Ops; + SDValue Op0; + SDValue Op1; + EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); + EVT MaskEltTy = MaskVecTy.getVectorElementType(); + bool Using1stVec = false; + bool Using2ndVec = false; + SDLoc DL(Op); + int ResTyNumElts = ResTy.getVectorNumElements(); + + for (int i = 0; i < ResTyNumElts; ++i) { + // Idx == -1 means UNDEF + int Idx = Indices[i]; + + if (0 <= Idx && Idx < ResTyNumElts) + Using1stVec = true; + if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) + Using2ndVec = true; } + + for (SmallVector::iterator I = Indices.begin(); I != Indices.end(); + ++I) + Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy)); + + SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, &Ops[0], + Ops.size()); + + if (Using1stVec && Using2ndVec) { + Op0 = Op->getOperand(0); + Op1 = Op->getOperand(1); + } else if (Using1stVec) + Op0 = Op1 = Op->getOperand(0); + else if (Using2ndVec) + Op0 = Op1 = Op->getOperand(1); + else + llvm_unreachable("shuffle vector mask references neither vector operand?"); + + return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op0, Op1); +} + +// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the +// indices in the shuffle. +SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + ShuffleVectorSDNode *Node = cast(Op); + EVT ResTy = Op->getValueType(0); + + if (!ResTy.is128BitVector()) + return SDValue(); + + int ResTyNumElts = ResTy.getVectorNumElements(); + SmallVector Indices; + + for (int i = 0; i < ResTyNumElts; ++i) + Indices.push_back(Node->getMaskElt(i)); + + SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); } MachineBasicBlock * MipsSETargetLowering:: @@ -836,3 +2511,318 @@ emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ MI->eraseFromParent(); // The pseudo instruction is gone now. return Sink; } + +MachineBasicBlock * MipsSETargetLowering:: +emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB, + unsigned BranchOp) const{ + // $bb: + // vany_nonzero $rd, $ws + // => + // $bb: + // bnz.b $ws, $tbb + // b $fbb + // $fbb: + // li $rd1, 0 + // b $sink + // $tbb: + // li $rd2, 1 + // $sink: + // $rd = phi($rd1, $fbb, $rd2, $tbb) + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetRegisterClass *RC = &Mips::GPR32RegClass; + DebugLoc DL = MI->getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, FBB); + F->insert(It, TBB); + F->insert(It, Sink); + + // Transfer the remainder of BB and its successor edges to Sink. + Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + // Add successors. + BB->addSuccessor(FBB); + BB->addSuccessor(TBB); + FBB->addSuccessor(Sink); + TBB->addSuccessor(Sink); + + // Insert the real bnz.b instruction to $BB. + BuildMI(BB, DL, TII->get(BranchOp)) + .addReg(MI->getOperand(1).getReg()) + .addMBB(TBB); + + // Fill $FBB. + unsigned RD1 = RegInfo.createVirtualRegister(RC); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) + .addReg(Mips::ZERO).addImm(0); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); + + // Fill $TBB. + unsigned RD2 = RegInfo.createVirtualRegister(RC); + BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) + .addReg(Mips::ZERO).addImm(1); + + // Insert phi function to $Sink. + BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), + MI->getOperand(0).getReg()) + .addReg(RD1).addMBB(FBB).addReg(RD2).addMBB(TBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} + +// Emit the COPY_FW pseudo instruction. +// +// copy_fw_pseudo $fd, $ws, n +// => +// copy_u_w $rt, $ws, $n +// mtc1 $rt, $fd +// +// When n is zero, the equivalent operation can be performed with (potentially) +// zero instructions due to register overlaps. This optimization is never valid +// for lane 1 because it would require FR=0 mode which isn't supported by MSA. +MachineBasicBlock * MipsSETargetLowering:: +emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{ + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Fd = MI->getOperand(0).getReg(); + unsigned Ws = MI->getOperand(1).getReg(); + unsigned Lane = MI->getOperand(2).getImm(); + + if (Lane == 0) + BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo); + else { + unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + + BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(1); + BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); + } + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the COPY_FD pseudo instruction. +// +// copy_fd_pseudo $fd, $ws, n +// => +// splati.d $wt, $ws, $n +// copy $fd, $wt:sub_64 +// +// When n is zero, the equivalent operation can be performed with (potentially) +// zero instructions due to register overlaps. This optimization is always +// valid because FR=1 mode which is the only supported mode in MSA. +MachineBasicBlock * MipsSETargetLowering:: +emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{ + assert(Subtarget->isFP64bit()); + + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + unsigned Fd = MI->getOperand(0).getReg(); + unsigned Ws = MI->getOperand(1).getReg(); + unsigned Lane = MI->getOperand(2).getImm() * 2; + DebugLoc DL = MI->getDebugLoc(); + + if (Lane == 0) + BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); + else { + unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + + BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); + BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); + } + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the INSERT_FW pseudo instruction. +// +// insert_fw_pseudo $wd, $wd_in, $n, $fs +// => +// subreg_to_reg $wt:sub_lo, $fs +// insve_w $wd[$n], $wd_in, $wt[0] +MachineBasicBlock * +MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Wd = MI->getOperand(0).getReg(); + unsigned Wd_in = MI->getOperand(1).getReg(); + unsigned Lane = MI->getOperand(2).getImm(); + unsigned Fs = MI->getOperand(3).getReg(); + unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + + BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) + .addImm(0) + .addReg(Fs) + .addImm(Mips::sub_lo); + BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) + .addReg(Wd_in) + .addImm(Lane) + .addReg(Wt); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the INSERT_FD pseudo instruction. +// +// insert_fd_pseudo $wd, $fs, n +// => +// subreg_to_reg $wt:sub_64, $fs +// insve_d $wd[$n], $wd_in, $wt[0] +MachineBasicBlock * +MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI, + MachineBasicBlock *BB) const { + assert(Subtarget->isFP64bit()); + + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Wd = MI->getOperand(0).getReg(); + unsigned Wd_in = MI->getOperand(1).getReg(); + unsigned Lane = MI->getOperand(2).getImm(); + unsigned Fs = MI->getOperand(3).getReg(); + unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + + BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) + .addImm(0) + .addReg(Fs) + .addImm(Mips::sub_64); + BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) + .addReg(Wd_in) + .addImm(Lane) + .addReg(Wt); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the FILL_FW pseudo instruction. +// +// fill_fw_pseudo $wd, $fs +// => +// implicit_def $wt1 +// insert_subreg $wt2:subreg_lo, $wt1, $fs +// splati.w $wd, $wt2[0] +MachineBasicBlock * +MipsSETargetLowering::emitFILL_FW(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Wd = MI->getOperand(0).getReg(); + unsigned Fs = MI->getOperand(1).getReg(); + unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + + BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); + BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) + .addReg(Wt1) + .addReg(Fs) + .addImm(Mips::sub_lo); + BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the FILL_FD pseudo instruction. +// +// fill_fd_pseudo $wd, $fs +// => +// implicit_def $wt1 +// insert_subreg $wt2:subreg_64, $wt1, $fs +// splati.d $wd, $wt2[0] +MachineBasicBlock * +MipsSETargetLowering::emitFILL_FD(MachineInstr *MI, + MachineBasicBlock *BB) const { + assert(Subtarget->isFP64bit()); + + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Wd = MI->getOperand(0).getReg(); + unsigned Fs = MI->getOperand(1).getReg(); + unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + + BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); + BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) + .addReg(Wt1) + .addReg(Fs) + .addImm(Mips::sub_64); + BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the FEXP2_W_1 pseudo instructions. +// +// fexp2_w_1_pseudo $wd, $wt +// => +// ldi.w $ws, 1 +// fexp2.w $wd, $ws, $wt +MachineBasicBlock * +MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetRegisterClass *RC = &Mips::MSA128WRegClass; + unsigned Ws1 = RegInfo.createVirtualRegister(RC); + unsigned Ws2 = RegInfo.createVirtualRegister(RC); + DebugLoc DL = MI->getDebugLoc(); + + // Splat 1.0 into a vector + BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); + BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); + + // Emit 1.0 * fexp2(Wt) + BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI->getOperand(0).getReg()) + .addReg(Ws2) + .addReg(MI->getOperand(1).getReg()); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the FEXP2_D_1 pseudo instructions. +// +// fexp2_d_1_pseudo $wd, $wt +// => +// ldi.d $ws, 1 +// fexp2.d $wd, $ws, $wt +MachineBasicBlock * +MipsSETargetLowering::emitFEXP2_D_1(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetRegisterClass *RC = &Mips::MSA128DRegClass; + unsigned Ws1 = RegInfo.createVirtualRegister(RC); + unsigned Ws2 = RegInfo.createVirtualRegister(RC); + DebugLoc DL = MI->getDebugLoc(); + + // Splat 1.0 into a vector + BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); + BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); + + // Emit 1.0 * fexp2(Wt) + BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI->getOperand(0).getReg()) + .addReg(Ws2) + .addReg(MI->getOperand(1).getReg()); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +}