+ else
+ ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ assert(!isVarArg && "Don't support passing vectors to varargs yet!");
+ assert(VR_idx != NumVRs &&
+ "Don't support passing more than 12 vector args yet!");
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ break;
+ }
+ }
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOpChains);
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDOperand InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ std::vector<MVT::ValueType> NodeTys;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+
+ std::vector<SDOperand> Ops;
+ unsigned CallOpc = PPCISD::CALL;
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
+ else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDOperand(Dest, 0);
+ else {
+ // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
+ // to do the call, we can't use PPCISD::CALL.
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, Ops);
+ InFlag = Chain.getValue(1);
+
+ // Copy the callee address into R12 on darwin.
+ Chain = DAG.getCopyToReg(Chain, PPC::R12, Callee, InFlag);
+ InFlag = Chain.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Other);
+ NodeTys.push_back(MVT::Flag);
+ Ops.clear();
+ Ops.push_back(Chain);
+ CallOpc = PPCISD::BCTRL;
+ Callee.Val = 0;
+ }
+
+ // If this is a direct call, pass the chain and the callee.
+ if (Callee.Val) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(CallOpc, NodeTys, Ops);
+ InFlag = Chain.getValue(1);
+
+ std::vector<SDOperand> ResultVals;
+ NodeTys.clear();
+
+ // If the call has results, copy the values out of the ret val registers.
+ switch (Op.Val->getValueType(0)) {
+ default: assert(0 && "Unexpected ret value!");
+ case MVT::Other: break;
+ case MVT::i32:
+ if (Op.Val->getValueType(1) == MVT::i32) {
+ Chain = DAG.getCopyFromReg(Chain, PPC::R4, MVT::i32, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32,
+ Chain.getValue(2)).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ NodeTys.push_back(MVT::i32);
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ }
+ NodeTys.push_back(MVT::i32);
+ break;
+ case MVT::i64:
+ Chain = DAG.getCopyFromReg(Chain, PPC::X3, MVT::i64, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ NodeTys.push_back(MVT::i64);
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ Chain = DAG.getCopyFromReg(Chain, PPC::F1, Op.Val->getValueType(0),
+ InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ NodeTys.push_back(Op.Val->getValueType(0));
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ Chain = DAG.getCopyFromReg(Chain, PPC::V2, Op.Val->getValueType(0),
+ InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ NodeTys.push_back(Op.Val->getValueType(0));
+ break;
+ }
+
+ Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
+ DAG.getConstant(NumBytes, PtrVT));
+ NodeTys.push_back(MVT::Other);
+
+ // If the function returns void, just return the chain.
+ if (ResultVals.empty())
+ return Chain;
+
+ // Otherwise, merge everything together with a MERGE_VALUES node.
+ ResultVals.push_back(Chain);
+ SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, ResultVals);
+ return Res.getValue(Op.ResNo);
+}
+
+static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Copy;
+ switch(Op.getNumOperands()) {
+ default:
+ assert(0 && "Do not know how to return this many arguments!");
+ abort();
+ case 1:
+ return SDOperand(); // ret void is legal
+ case 3: {
+ MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
+ unsigned ArgReg;
+ if (ArgVT == MVT::i32) {
+ ArgReg = PPC::R3;
+ } else if (ArgVT == MVT::i64) {
+ ArgReg = PPC::X3;
+ } else if (MVT::isFloatingPoint(ArgVT)) {
+ ArgReg = PPC::F1;
+ } else {
+ assert(MVT::isVector(ArgVT));
+ ArgReg = PPC::V2;
+ }
+
+ Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),
+ SDOperand());
+
+ // If we haven't noted the R3/F1 are live out, do so now.
+ if (DAG.getMachineFunction().liveout_empty())
+ DAG.getMachineFunction().addLiveOut(ArgReg);
+ break;
+ }
+ case 5:
+ Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(3),
+ SDOperand());
+ Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1));
+ // If we haven't noted the R3+R4 are live out, do so now.
+ if (DAG.getMachineFunction().liveout_empty()) {
+ DAG.getMachineFunction().addLiveOut(PPC::R3);
+ DAG.getMachineFunction().addLiveOut(PPC::R4);
+ }
+ break;
+ }
+ return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
+}
+
+/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
+/// possible.
+static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {
+ // Not FP? Not a fsel.
+ if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
+ !MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
+ return SDOperand();
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ // Cannot handle SETEQ/SETNE.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand();
+
+ MVT::ValueType ResVT = Op.getValueType();
+ MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
+ SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3);
+
+ // If the RHS of the comparison is a 0.0, we don't need to do the
+ // subtraction at all.
+ if (isFloatingPointZero(RHS))
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETUGE:
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETULE:
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, ResVT,
+ DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
+ }
+
+ SDOperand Cmp;
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
+ case ISD::SETUGE:
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
+ case ISD::SETULE:
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
+ }
+ return SDOperand();
+}
+
+static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
+ assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
+ SDOperand Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
+
+ SDOperand Tmp;
+ switch (Op.getValueType()) {
+ default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
+ break;
+ case MVT::i64:
+ Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
+ break;
+ }
+
+ // Convert the FP value to an int value through memory.
+ SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);
+ if (Op.getValueType() == MVT::i32)
+ Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
+ return Bits;
+}
+
+static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
+ if (Op.getOperand(0).getValueType() == MVT::i64) {
+ SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
+ SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
+ return FP;
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "Unhandled SINT_TO_FP type in custom expander!");
+ // Since we only generate this in 64-bit mode, we can take advantage of
+ // 64-bit registers. In particular, sign extend the input value into the
+ // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
+ // then lfd it and fcfid it.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8);
+ SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
+
+ SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
+ Op.getOperand(0));
+
+ // STD the extended value into the stack slot.
+ SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
+ DAG.getEntryNode(), Ext64, FIdx,
+ DAG.getSrcValue(NULL));
+ // Load the value as a double.
+ SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));
+
+ // FCFID it and return it.
+ SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
+ return FP;
+}
+
+static SDOperand LowerSHL(SDOperand Op, SelectionDAG &DAG,
+ MVT::ValueType PtrVT) {
+ assert(Op.getValueType() == MVT::i64 &&
+ Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
+ // The generic code does a fine job expanding shift by a constant.
+ if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();
+
+ // Otherwise, expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(0, PtrVT));
+ SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(1, PtrVT));
+ SDOperand Amt = Op.getOperand(1);
+
+ SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
+ DAG.getConstant(32, MVT::i32), Amt);
+ SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
+ SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
+ SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
+ SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
+ DAG.getConstant(-32U, MVT::i32));
+ SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
+ SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
+ SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
+ return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
+}
+
+static SDOperand LowerSRL(SDOperand Op, SelectionDAG &DAG,
+ MVT::ValueType PtrVT) {
+ assert(Op.getValueType() == MVT::i64 &&
+ Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
+ // The generic code does a fine job expanding shift by a constant.
+ if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();
+
+ // Otherwise, expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(0, PtrVT));
+ SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(1, PtrVT));
+ SDOperand Amt = Op.getOperand(1);
+
+ SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
+ DAG.getConstant(32, MVT::i32), Amt);
+ SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
+ SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
+ SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
+ SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
+ DAG.getConstant(-32U, MVT::i32));
+ SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
+ SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
+ SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
+ return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
+}
+
+static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG,
+ MVT::ValueType PtrVT) {
+ assert(Op.getValueType() == MVT::i64 &&
+ Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
+ // The generic code does a fine job expanding shift by a constant.
+ if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();
+
+ // Otherwise, expand into a bunch of logical ops, followed by a select_cc.
+ SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(0, PtrVT));
+ SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(1, PtrVT));
+ SDOperand Amt = Op.getOperand(1);
+
+ SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
+ DAG.getConstant(32, MVT::i32), Amt);
+ SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
+ SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
+ SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
+ SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
+ DAG.getConstant(-32U, MVT::i32));
+ SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
+ SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
+ SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
+ Tmp4, Tmp6, ISD::SETLE);
+ return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
+}
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering.
+//
+
+// If this is a vector of constants or undefs, get the bits. A bit in
+// UndefBits is set if the corresponding element of the vector is an
+// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
+// zero. Return true if this is not an array of constants, false if it is.
+//
+static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
+ uint64_t UndefBits[2]) {
+ // Start with zero'd results.
+ VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
+
+ unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ SDOperand OpVal = BV->getOperand(i);
+
+ unsigned PartNo = i >= e/2; // In the upper 128 bits?
+ unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
+
+ uint64_t EltBits = 0;
+ if (OpVal.getOpcode() == ISD::UNDEF) {
+ uint64_t EltUndefBits = ~0U >> (32-EltBitSize);
+ UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
+ continue;
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ EltBits = CN->getValue() & (~0U >> (32-EltBitSize));
+ } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+ assert(CN->getValueType(0) == MVT::f32 &&
+ "Only one legal FP vector type!");
+ EltBits = FloatToBits(CN->getValue());
+ } else {
+ // Nonconstant element.
+ return true;
+ }
+
+ VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
+ }
+
+ //printf("%llx %llx %llx %llx\n",
+ // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
+ return false;
+}
+
+// If this is a splat (repetition) of a value across the whole vector, return
+// the smallest size that splats it. For example, "0x01010101010101..." is a
+// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
+// SplatSize = 1 byte.
+static bool isConstantSplat(const uint64_t Bits128[2],
+ const uint64_t Undef128[2],
+ unsigned &SplatBits, unsigned &SplatUndef,
+ unsigned &SplatSize) {
+
+ // Don't let undefs prevent splats from matching. See if the top 64-bits are
+ // the same as the lower 64-bits, ignoring undefs.
+ if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0]))
+ return false; // Can't be a splat if two pieces don't match.
+
+ uint64_t Bits64 = Bits128[0] | Bits128[1];
+ uint64_t Undef64 = Undef128[0] & Undef128[1];
+
+ // Check that the top 32-bits are the same as the lower 32-bits, ignoring
+ // undefs.
+ if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64))
+ return false; // Can't be a splat if two pieces don't match.
+
+ uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
+ uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
+
+ // If the top 16-bits are different than the lower 16-bits, ignoring
+ // undefs, we have an i32 splat.
+ if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) {
+ SplatBits = Bits32;
+ SplatUndef = Undef32;
+ SplatSize = 4;
+ return true;
+ }
+
+ uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
+ uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
+
+ // If the top 8-bits are different than the lower 8-bits, ignoring
+ // undefs, we have an i16 splat.
+ if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) {
+ SplatBits = Bits16;
+ SplatUndef = Undef16;
+ SplatSize = 2;
+ return true;
+ }
+
+ // Otherwise, we have an 8-bit splat.
+ SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
+ SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
+ SplatSize = 1;
+ return true;
+}
+
+/// BuildSplatI - Build a canonical splati of Val with an element size of
+/// SplatSize. Cast the result to VT.
+static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT,
+ SelectionDAG &DAG) {
+ assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
+
+ // Force vspltis[hw] -1 to vspltisb -1.
+ if (Val == -1) SplatSize = 1;
+
+ static const MVT::ValueType VTys[] = { // canonical VT to use for each size.
+ MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+ };
+ MVT::ValueType CanonicalVT = VTys[SplatSize-1];
+
+ // Build a canonical splat for this value.
+ SDOperand Elt = DAG.getConstant(Val, MVT::getVectorBaseType(CanonicalVT));
+ std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);
+ SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);
+ return DAG.getNode(ISD::BIT_CONVERT, VT, Res);
+}
+
+/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS,
+ SelectionDAG &DAG,
+ MVT::ValueType DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = LHS.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
+ DAG.getConstant(IID, MVT::i32), LHS, RHS);
+}
+
+/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1,
+ SDOperand Op2, SelectionDAG &DAG,
+ MVT::ValueType DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op0.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
+ DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+}
+
+
+/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
+/// amount. The result has the specified value type.
+static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt,
+ MVT::ValueType VT, SelectionDAG &DAG) {
+ // Force LHS/RHS to be the right type.
+ LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS);
+ RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS);
+
+ std::vector<SDOperand> Ops;
+ for (unsigned i = 0; i != 16; ++i)
+ Ops.push_back(DAG.getConstant(i+Amt, MVT::i32));
+ SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));
+ return DAG.getNode(ISD::BIT_CONVERT, VT, T);
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it. If we CAN select this case, and if it
+// selects to a single instruction, return Op. Otherwise, if we can codegen
+// this case more efficiently than a constant pool load, lower it to the
+// sequence of ops that should be used.
+static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
+ // If this is a vector of constants or undefs, get the bits. A bit in
+ // UndefBits is set if the corresponding element of the vector is an
+ // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
+ // zero.
+ uint64_t VectorBits[2];
+ uint64_t UndefBits[2];
+ if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))
+ return SDOperand(); // Not a constant vector.
+
+ // If this is a splat (repetition) of a value across the whole vector, return
+ // the smallest size that splats it. For example, "0x01010101010101..." is a
+ // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
+ // SplatSize = 1 byte.
+ unsigned SplatBits, SplatUndef, SplatSize;
+ if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){
+ bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0;
+
+ // First, handle single instruction cases.
+
+ // All zeros?
+ if (SplatBits == 0) {
+ // Canonicalize all zero vectors to be v4i32.
+ if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+ SDOperand Z = DAG.getConstant(0, MVT::i32);
+ Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
+ Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
+ }
+ return Op;
+ }
+
+ // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+ int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);
+ if (SextVal >= -16 && SextVal <= 15)
+ return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG);
+
+
+ // Two instruction sequences.
+
+ // If this value is in the range [-32,30] and is even, use:
+ // tmp = VSPLTI[bhw], result = add tmp, tmp
+ if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
+ Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG);
+ return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op);
+ }
+
+ // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
+ // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
+ // for fneg/fabs.
+ if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
+ // Make -1 and vspltisw -1:
+ SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG);
+
+ // Make the VSLW intrinsic, computing 0x8000_0000.
+ SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
+ OnesV, DAG);
+
+ // xor by OnesV to invert it.
+ Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
+ }
+
+ // Check to see if this is a wide variety of vsplti*, binop self cases.
+ unsigned SplatBitSize = SplatSize*8;
+ static const char SplatCsts[] = {
+ -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
+ -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
+ };
+ for (unsigned idx = 0; idx < sizeof(SplatCsts)/sizeof(SplatCsts[0]); ++idx){
+ // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
+ // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
+ int i = SplatCsts[idx];
+
+ // Figure out what shift amount will be used by altivec if shifted by i in
+ // this splat size.
+ unsigned TypeShiftAmt = i & (SplatBitSize-1);
+
+ // vsplti + shl self.
+ if (SextVal == (i << (int)TypeShiftAmt)) {
+ Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
+ Intrinsic::ppc_altivec_vslw
+ };
+ return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
+ }
+
+ // vsplti + srl self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
+ Intrinsic::ppc_altivec_vsrw
+ };
+ return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
+ }
+
+ // vsplti + sra self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
+ Intrinsic::ppc_altivec_vsraw
+ };
+ return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
+ }
+
+ // vsplti + rol self.
+ if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
+ ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
+ Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
+ Intrinsic::ppc_altivec_vrlw
+ };
+ return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
+ }
+
+ // t = vsplti c, result = vsldoi t, t, 1
+ if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
+ SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
+ return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG);
+ }
+ // t = vsplti c, result = vsldoi t, t, 2
+ if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
+ SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
+ return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG);
+ }
+ // t = vsplti c, result = vsldoi t, t, 3
+ if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
+ SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
+ return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG);
+ }
+ }
+
+ // Three instruction sequences.
+
+ // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
+ if (SextVal >= 0 && SextVal <= 31) {
+ SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, Op.getValueType(),DAG);
+ SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG);
+ return DAG.getNode(ISD::SUB, Op.getValueType(), LHS, RHS);
+ }
+ // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
+ if (SextVal >= -31 && SextVal <= 0) {
+ SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, Op.getValueType(),DAG);
+ SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG);
+ return DAG.getNode(ISD::ADD, Op.getValueType(), LHS, RHS);
+ }
+ }
+
+ return SDOperand();
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS,
+ SDOperand RHS, SelectionDAG &DAG) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VMRGHW,
+ OP_VMRGLW,
+ OP_VSPLTISW0,
+ OP_VSPLTISW1,
+ OP_VSPLTISW2,
+ OP_VSPLTISW3,
+ OP_VSLDOI4,
+ OP_VSLDOI8,
+ OP_VSLDOI12
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDOperand OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG);
+
+ unsigned ShufIdxs[16];
+ switch (OpNum) {
+ default: assert(0 && "Unknown i32 permute!");
+ case OP_VMRGHW:
+ ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
+ ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
+ ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
+ ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
+ break;
+ case OP_VMRGLW:
+ ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
+ ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
+ ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
+ ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
+ break;
+ case OP_VSPLTISW0:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+0;
+ break;
+ case OP_VSPLTISW1:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+4;
+ break;
+ case OP_VSPLTISW2:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+8;
+ break;
+ case OP_VSPLTISW3:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+12;
+ break;
+ case OP_VSLDOI4:
+ return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG);
+ case OP_VSLDOI8:
+ return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG);
+ case OP_VSLDOI12:
+ return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG);
+ }
+ std::vector<SDOperand> Ops;
+ for (unsigned i = 0; i != 16; ++i)
+ Ops.push_back(DAG.getConstant(ShufIdxs[i], MVT::i32));
+
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));
+}
+
+/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
+/// is a shuffle we can handle in a single instruction, return it. Otherwise,
+/// return the code it can be lowered into. Worst case, it can always be
+/// lowered into a vperm.
+static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand V1 = Op.getOperand(0);
+ SDOperand V2 = Op.getOperand(1);
+ SDOperand PermMask = Op.getOperand(2);
+
+ // Cases that are handled by instructions that take permute immediates
+ // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
+ // selected by the instruction selector.
+ if (V2.getOpcode() == ISD::UNDEF) {
+ if (PPC::isSplatShuffleMask(PermMask.Val, 1) ||
+ PPC::isSplatShuffleMask(PermMask.Val, 2) ||
+ PPC::isSplatShuffleMask(PermMask.Val, 4) ||
+ PPC::isVPKUWUMShuffleMask(PermMask.Val, true) ||
+ PPC::isVPKUHUMShuffleMask(PermMask.Val, true) ||
+ PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 ||
+ PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) ||
+ PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) ||
+ PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) ||
+ PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) ||
+ PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) ||
+ PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) {
+ return Op;
+ }
+ }
+
+ // Altivec has a variety of "shuffle immediates" that take two vector inputs
+ // and produce a fixed permutation. If any of these match, do not lower to
+ // VPERM.
+ if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) ||
+ PPC::isVPKUHUMShuffleMask(PermMask.Val, false) ||
+ PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 ||
+ PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) ||
+ PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) ||
+ PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) ||
+ PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) ||
+ PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) ||
+ PPC::isVMRGHShuffleMask(PermMask.Val, 4, false))
+ return Op;
+
+ // Check to see if this is a shuffle of 4-byte values. If so, we can use our
+ // perfect shuffle table to emit an optimal matching sequence.
+ unsigned PFIndexes[4];
+ bool isFourElementShuffle = true;
+ for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
+ unsigned EltNo = 8; // Start out undef.
+ for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
+ if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)
+ continue; // Undef, ignore it.
+
+ unsigned ByteSource =
+ cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue();
+ if ((ByteSource & 3) != j) {
+ isFourElementShuffle = false;