const APFloat& Val) {
assert(MVT::isFloatingPoint(VT) && "Can only convert between FP types");
- // Anything can be extended to ppc long double.
- if (VT == MVT::ppcf128)
- return true;
-
- // PPC long double cannot be shrunk to anything though.
- if (&Val.getSemantics() == &APFloat::PPCDoubleDouble)
+ // PPC long double cannot be converted to any other type.
+ if (VT == MVT::ppcf128 ||
+ &Val.getSemantics() == &APFloat::PPCDoubleDouble)
return false;
// convert modifies in place, so make a copy.
switch (Result) {
default: break;
case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
+ case ISD::SETOEQ: // SETEQ & SETU[LG]E
case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
/// solely with their pointer.
-void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
ID.AddPointer(VTList.VTs);
}
static void AddNodeIDNode(FoldingSetNodeID &ID,
unsigned short OpC, SDVTList VTList,
- const SDOperand *OpList, unsigned N) {
+ SDOperandPtr OpList, unsigned N) {
AddNodeIDOpcode(ID, OpC);
AddNodeIDValueTypes(ID, VTList);
AddNodeIDOperands(ID, OpList, N);
KnownZero = KnownZeroOut;
return;
}
+ case ISD::MUL: {
+ APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clear();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero &= Mask;
+ return;
+ }
+ case ISD::UDIV: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0),
+ AllOnes, KnownZero2, KnownOne2, Depth+1);
+ unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+ KnownOne2.clear();
+ KnownZero2.clear();
+ ComputeMaskedBits(Op.getOperand(1),
+ AllOnes, KnownZero2, KnownOne2, Depth+1);
+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ if (RHSUnknownLeadingOnes != BitWidth)
+ LeadZ = std::min(BitWidth,
+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+ return;
+ }
case ISD::SELECT:
ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
return;
+ case ISD::SUB: {
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (CLHS->getAPIntValue().isNonNegative()) {
+ unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
+ Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+ }
+ }
+ }
+ }
+ // fall through
case ISD::ADD: {
- // If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
// low 3 bits clear.
- unsigned KnownZeroOut = std::min(KnownZero.countTrailingOnes(),
- KnownZero2.countTrailingOnes());
-
- KnownZero = APInt::getLowBitsSet(BitWidth, KnownZeroOut);
- KnownOne = APInt(BitWidth, 0);
+ APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes());
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+
+ ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ KnownZeroOut = std::min(KnownZeroOut,
+ KnownZero2.countTrailingOnes());
+
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
return;
}
- case ISD::SUB: {
- ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0));
- if (!CLHS) return;
-
- // We know that the top bits of C-X are clear if X contains less bits
- // than C (i.e. no wrap-around can happen). For example, 20-X is
- // positive if we can prove that X is >= 0 and < 16.
- if (CLHS->getAPIntValue().isNonNegative()) {
- unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
- // NLZ can't be BitWidth with no sign bit
- APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1);
-
- // If all of the MaskV bits are known to be zero, then we know the output
- // top bits are zero, because we now know that the output is from [0-C].
- if ((KnownZero & MaskV) == MaskV) {
- unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
- // Top bits known zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
- KnownOne = APInt(BitWidth, 0); // No one bits known.
- } else {
- KnownZero = KnownOne = APInt(BitWidth, 0); // Otherwise, nothing known.
+ case ISD::SREM:
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt RA = Rem->getAPIntValue();
+ if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
+ APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
+ APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+
+ // The sign of a remainder is equal to the sign of the first
+ // operand (zero being positive).
+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ KnownZero2 |= ~LowBits;
+ else if (KnownOne2[BitWidth-1])
+ KnownOne2 |= ~LowBits;
+
+ KnownZero |= KnownZero2 & Mask;
+ KnownOne |= KnownOne2 & Mask;
+
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
}
}
return;
+ case ISD::UREM: {
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt RA = Rem->getAPIntValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ APInt Mask2 = LowBits & Mask;
+ KnownZero |= ~LowBits & Mask;
+ ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
+ Depth+1);
+ ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
+ Depth+1);
+
+ uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ KnownOne.clear();
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+ return;
}
default:
// Allow the target to implement this method for its nodes.
assert(MVT::isInteger(VT) && "Invalid VT!");
unsigned VTBits = MVT::getSizeInBits(VT);
unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
if (Depth == 6)
return 1; // Limit search depth.
case ISD::AND:
case ISD::OR:
case ISD::XOR: // NOT is handled here.
- // Logical binary ops preserve the number of sign bits.
+ // Logical binary ops preserve the number of sign bits at the worst.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
- if (Tmp == 1) return 1; // Early out.
- Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
- return std::min(Tmp, Tmp2);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // ComputeMaskedBits, and pick whichever answer is better.
+ }
+ break;
case ISD::SELECT:
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
if (Tmp == 1) return 1; // Early out.
- Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
return std::min(Tmp, Tmp2);
case ISD::SETCC:
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) {
unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
- if (NumBits > 1) return NumBits;
+ if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
Mask = KnownOne;
} else {
// Nothing known.
- return 1;
+ return FirstAnswer;
}
// Okay, we know that the sign bit in Mask is set. Use CLZ to determine
Mask <<= Mask.getBitWidth()-VTBits;
// Return # leading zeros. We use 'min' here in case Val was zero before
// shifting. We don't want to return '64' as for an i32 "0".
- return std::min(VTBits, Mask.countLeadingZeros());
+ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
}
}
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+SDOperand SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned Idx) {
+ MVT::ValueType VT = N->getValueType(0);
+ SDOperand PermMask = N->getOperand(2);
+ unsigned NumElems = PermMask.getNumOperands();
+ SDOperand V = (Idx < NumElems) ? N->getOperand(0) : N->getOperand(1);
+ Idx %= NumElems;
+
+ if (V.getOpcode() == ISD::BIT_CONVERT) {
+ V = V.getOperand(0);
+ if (MVT::getVectorNumElements(V.getValueType()) != NumElems)
+ return SDOperand();
+ }
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Idx == 0) ? V.getOperand(0)
+ : getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Idx);
+ if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ SDOperand Elt = PermMask.getOperand(Idx);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ return getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
+ return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Elt)->getValue());
+ }
+ return SDOperand();
+}
+
+
/// getNode - Gets or creates the specified node.
///
SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT) {
/// operand.
static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
SelectionDAG &DAG) {
- MVT::ValueType CurVT = VT;
+ unsigned NumBits = MVT::isVector(VT) ?
+ MVT::getSizeInBits(MVT::getVectorElementType(VT)) : MVT::getSizeInBits(VT);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
- uint64_t Val = C->getValue() & 255;
+ APInt Val = APInt(NumBits, C->getValue() & 255);
unsigned Shift = 8;
- while (CurVT != MVT::i8) {
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
Val = (Val << Shift) | Val;
Shift <<= 1;
- CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
- }
- return DAG.getConstant(Val, VT);
- } else {
- Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
- unsigned Shift = 8;
- while (CurVT != MVT::i8) {
- Value =
- DAG.getNode(ISD::OR, VT,
- DAG.getNode(ISD::SHL, VT, Value,
- DAG.getConstant(Shift, MVT::i8)), Value);
- Shift <<= 1;
- CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
}
+ if (MVT::isInteger(VT))
+ return DAG.getConstant(Val, VT);
+ return DAG.getConstantFP(APFloat(Val), VT);
+ }
- return Value;
+ Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Value = DAG.getNode(ISD::OR, VT,
+ DAG.getNode(ISD::SHL, VT, Value,
+ DAG.getConstant(Shift, MVT::i8)), Value);
+ Shift <<= 1;
}
+
+ return Value;
}
/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
/// used when a memcpy is turned into a memset when the source is a constant
/// string ptr.
-static SDOperand getMemsetStringVal(MVT::ValueType VT,
- SelectionDAG &DAG,
+static SDOperand getMemsetStringVal(MVT::ValueType VT, SelectionDAG &DAG,
const TargetLowering &TLI,
std::string &Str, unsigned Offset) {
+ assert(!MVT::isVector(VT) && "Can't handle vector type here!");
+ unsigned NumBits = MVT::getSizeInBits(VT);
+ unsigned MSB = NumBits / 8;
uint64_t Val = 0;
- unsigned MSB = MVT::getSizeInBits(VT) / 8;
if (TLI.isLittleEndian())
Offset = Offset + MSB - 1;
for (unsigned i = 0; i != MSB; ++i) {
}
/// getMemBasePlusOffset - Returns base and offset node for the
+///
static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset,
SelectionDAG &DAG) {
MVT::ValueType VT = Base.getValueType();
return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT));
}
+/// isMemSrcFromString - Returns true if memcpy source is a string constant.
+///
+static bool isMemSrcFromString(SDOperand Src, std::string &Str,
+ uint64_t &SrcOff) {
+ unsigned SrcDelta = 0;
+ GlobalAddressSDNode *G = NULL;
+ if (Src.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Src);
+ else if (Src.getOpcode() == ISD::ADD &&
+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Src.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getValue();
+ }
+ if (!G)
+ return false;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+ if (GV && GV->isConstant()) {
+ Str = GV->getStringValue(false);
+ if (!Str.empty()) {
+ SrcOff += SrcDelta;
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
/// to replace the memset / memcpy is below the threshold. It also returns the
/// types of the sequence of memory ops to perform memset / memcpy.
-static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType> &MemOps,
- unsigned Limit, uint64_t Size,
- unsigned Align,
- const TargetLowering &TLI) {
- MVT::ValueType VT;
-
- if (TLI.allowsUnalignedMemoryAccesses()) {
- VT = MVT::i64;
- } else {
- switch (Align & 7) {
- case 0:
- VT = MVT::i64;
- break;
- case 4:
- VT = MVT::i32;
- break;
- case 2:
- VT = MVT::i16;
- break;
- default:
- VT = MVT::i8;
- break;
+static
+bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType> &MemOps,
+ SDOperand Dst, SDOperand Src,
+ unsigned Limit, uint64_t Size, unsigned &Align,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses();
+
+ std::string Str;
+ uint64_t SrcOff = 0;
+ bool isSrcStr = isMemSrcFromString(Src, Str, SrcOff);
+ bool isSrcConst = isa<ConstantSDNode>(Src);
+ MVT::ValueType VT= TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr);
+ if (VT != MVT::iAny) {
+ unsigned NewAlign = (unsigned)
+ TLI.getTargetData()->getABITypeAlignment(MVT::getTypeForValueType(VT));
+ // If source is a string constant, this will require an unaligned load.
+ if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
+ if (Dst.getOpcode() != ISD::FrameIndex) {
+ // Can't change destination alignment. It requires a unaligned store.
+ if (AllowUnalign)
+ VT = MVT::iAny;
+ } else {
+ int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (MFI->isFixedObjectIndex(FI)) {
+ // Can't change destination alignment. It requires a unaligned store.
+ if (AllowUnalign)
+ VT = MVT::iAny;
+ } else {
+ // Give the stack frame object a larger alignment.
+ MFI->setObjectAlignment(FI, NewAlign);
+ Align = NewAlign;
+ }
+ }
}
}
- MVT::ValueType LVT = MVT::i64;
- while (!TLI.isTypeLegal(LVT))
- LVT = (MVT::ValueType)((unsigned)LVT - 1);
- assert(MVT::isInteger(LVT));
+ if (VT == MVT::iAny) {
+ if (AllowUnalign) {
+ VT = MVT::i64;
+ } else {
+ switch (Align & 7) {
+ case 0: VT = MVT::i64; break;
+ case 4: VT = MVT::i32; break;
+ case 2: VT = MVT::i16; break;
+ default: VT = MVT::i8; break;
+ }
+ }
- if (VT > LVT)
- VT = LVT;
+ MVT::ValueType LVT = MVT::i64;
+ while (!TLI.isTypeLegal(LVT))
+ LVT = (MVT::ValueType)((unsigned)LVT - 1);
+ assert(MVT::isInteger(LVT));
+
+ if (VT > LVT)
+ VT = LVT;
+ }
unsigned NumMemOps = 0;
while (Size != 0) {
unsigned VTSize = MVT::getSizeInBits(VT) / 8;
while (VTSize > Size) {
- VT = (MVT::ValueType)((unsigned)VT - 1);
- VTSize >>= 1;
+ // For now, only use non-vector load / store's for the left-over pieces.
+ if (MVT::isVector(VT)) {
+ VT = MVT::i64;
+ while (!TLI.isTypeLegal(VT))
+ VT = (MVT::ValueType)((unsigned)VT - 1);
+ VTSize = MVT::getSizeInBits(VT) / 8;
+ } else {
+ VT = (MVT::ValueType)((unsigned)VT - 1);
+ VTSize >>= 1;
+ }
}
- assert(MVT::isInteger(VT));
if (++NumMemOps > Limit)
return false;
static SDOperand getMemcpyLoadsAndStores(SelectionDAG &DAG,
SDOperand Chain, SDOperand Dst,
SDOperand Src, uint64_t Size,
- unsigned Align,
- bool AlwaysInline,
- const Value *DstSV, uint64_t DstOff,
- const Value *SrcSV, uint64_t SrcOff) {
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff){
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // Expand memcpy to a series of store ops if the size operand falls below
- // a certain threshold.
+ // Expand memcpy to a series of load and store ops if the size operand falls
+ // below a certain threshold.
std::vector<MVT::ValueType> MemOps;
uint64_t Limit = -1;
if (!AlwaysInline)
Limit = TLI.getMaxStoresPerMemcpy();
- if (!MeetsMaxMemopRequirement(MemOps, Limit, Size, Align, TLI))
+ unsigned DstAlign = Align; // Destination alignment can change.
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
+ DAG, TLI))
return SDOperand();
- SmallVector<SDOperand, 8> OutChains;
-
- unsigned NumMemOps = MemOps.size();
- unsigned SrcDelta = 0;
- GlobalAddressSDNode *G = NULL;
std::string Str;
- bool CopyFromStr = false;
-
- if (Src.getOpcode() == ISD::GlobalAddress)
- G = cast<GlobalAddressSDNode>(Src);
- else if (Src.getOpcode() == ISD::ADD &&
- Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
- Src.getOperand(1).getOpcode() == ISD::Constant) {
- G = cast<GlobalAddressSDNode>(Src.getOperand(0));
- SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getValue();
- }
- if (G) {
- GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
- if (GV && GV->isConstant()) {
- Str = GV->getStringValue(false);
- if (!Str.empty()) {
- CopyFromStr = true;
- SrcOff += SrcDelta;
- }
- }
- }
+ uint64_t SrcOff = 0, DstOff = 0;
+ bool CopyFromStr = isMemSrcFromString(Src, Str, SrcOff);
+ SmallVector<SDOperand, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
for (unsigned i = 0; i < NumMemOps; i++) {
MVT::ValueType VT = MemOps[i];
unsigned VTSize = MVT::getSizeInBits(VT) / 8;
SDOperand Value, Store;
- if (CopyFromStr) {
+ if (CopyFromStr && !MVT::isVector(VT)) {
+ // It's unlikely a store of a vector immediate can be done in a single
+ // instruction. It would require a load from a constantpool first.
+ // FIXME: Handle cases where store of vector immediate is done in a
+ // single instruction.
Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff);
- Store =
- DAG.getStore(Chain, Value,
- getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstOff);
+ Store = DAG.getStore(Chain, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff);
} else {
Value = DAG.getLoad(VT, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcOff, false, Align);
- Store =
- DAG.getStore(Chain, Value,
- getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstOff, false, Align);
+ SrcSV, SrcSVOff + SrcOff, false, Align);
+ Store = DAG.getStore(Chain, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, false, DstAlign);
}
OutChains.push_back(Store);
SrcOff += VTSize;
&OutChains[0], OutChains.size());
}
+static SDOperand getMemmoveLoadsAndStores(SelectionDAG &DAG,
+ SDOperand Chain, SDOperand Dst,
+ SDOperand Src, uint64_t Size,
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff){
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memmove to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ std::vector<MVT::ValueType> MemOps;
+ uint64_t Limit = -1;
+ if (!AlwaysInline)
+ Limit = TLI.getMaxStoresPerMemmove();
+ unsigned DstAlign = Align; // Destination alignment can change.
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
+ DAG, TLI))
+ return SDOperand();
+
+ std::string Str;
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ SmallVector<SDOperand, 8> LoadValues;
+ SmallVector<SDOperand, 8> LoadChains;
+ SmallVector<SDOperand, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT::ValueType VT = MemOps[i];
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ SDOperand Value, Store;
+
+ Value = DAG.getLoad(VT, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcSV, SrcSVOff + SrcOff, false, Align);
+ LoadValues.push_back(Value);
+ LoadChains.push_back(Value.getValue(1));
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &LoadChains[0], LoadChains.size());
+ OutChains.clear();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT::ValueType VT = MemOps[i];
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ SDOperand Value, Store;
+
+ Store = DAG.getStore(Chain, LoadValues[i],
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstSVOff + DstOff, false, DstAlign);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
static SDOperand getMemsetStores(SelectionDAG &DAG,
SDOperand Chain, SDOperand Dst,
SDOperand Src, uint64_t Size,
unsigned Align,
- const Value *DstSV, uint64_t DstOff) {
+ const Value *DstSV, uint64_t DstSVOff) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Expand memset to a series of load/store ops if the size operand
// falls below a certain threshold.
std::vector<MVT::ValueType> MemOps;
- if (!MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(),
- Size, Align, TLI))
+ if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
+ Size, Align, DAG, TLI))
return SDOperand();
SmallVector<SDOperand, 8> OutChains;
+ uint64_t DstOff = 0;
unsigned NumMemOps = MemOps.size();
for (unsigned i = 0; i < NumMemOps; i++) {
SDOperand Value = getMemsetValue(Src, VT, DAG);
SDOperand Store = DAG.getStore(Chain, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstOff);
+ DstSV, DstSVOff + DstOff);
OutChains.push_back(Store);
DstOff += VTSize;
}
SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dst,
SDOperand Src, SDOperand Size,
unsigned Align, bool AlwaysInline,
- const Value *DstSV, uint64_t DstOff,
- const Value *SrcSV, uint64_t SrcOff) {
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff) {
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
SDOperand Result =
getMemcpyLoadsAndStores(*this, Chain, Dst, Src, ConstantSize->getValue(),
- Align, false, DstSV, DstOff, SrcSV, SrcOff);
+ Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
if (Result.Val)
return Result;
}
SDOperand Result =
TLI.EmitTargetCodeForMemcpy(*this, Chain, Dst, Src, Size, Align,
AlwaysInline,
- DstSV, DstOff, SrcSV, SrcOff);
+ DstSV, DstSVOff, SrcSV, SrcSVOff);
if (Result.Val)
return Result;
assert(ConstantSize && "AlwaysInline requires a constant size!");
return getMemcpyLoadsAndStores(*this, Chain, Dst, Src,
ConstantSize->getValue(), Align, true,
- DstSV, DstOff, SrcSV, SrcOff);
+ DstSV, DstSVOff, SrcSV, SrcSVOff);
}
// Emit a library call.
SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dst,
SDOperand Src, SDOperand Size,
unsigned Align,
- const Value *DstSV, uint64_t DstOff,
- const Value *SrcSV, uint64_t SrcOff) {
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff) {
+
+ // Check to see if we should lower the memmove to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memmove with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
- // TODO: Optimize small memmove cases with simple loads and stores,
- // ensuring that all loads precede all stores. This can cause severe
- // register pressure, so targets should be careful with the size limit.
+ SDOperand Result =
+ getMemmoveLoadsAndStores(*this, Chain, Dst, Src, ConstantSize->getValue(),
+ Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ if (Result.Val)
+ return Result;
+ }
// Then check to see if we should lower the memmove with target-specific
// code. If the target chooses to do this, this is the next best.
SDOperand Result =
TLI.EmitTargetCodeForMemmove(*this, Chain, Dst, Src, Size, Align,
- DstSV, DstOff, SrcSV, SrcOff);
+ DstSV, DstSVOff, SrcSV, SrcSVOff);
if (Result.Val)
return Result;
SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dst,
SDOperand Src, SDOperand Size,
unsigned Align,
- const Value *DstSV, uint64_t DstOff) {
+ const Value *DstSV, uint64_t DstSVOff) {
// Check to see if we should lower the memset to stores first.
// For cases within the target-specified limits, this is the best choice.
SDOperand Result =
getMemsetStores(*this, Chain, Dst, Src, ConstantSize->getValue(), Align,
- DstSV, DstOff);
+ DstSV, DstSVOff);
if (Result.Val)
return Result;
}
// code. If the target chooses to do this, this is the next best.
SDOperand Result =
TLI.EmitTargetCodeForMemset(*this, Chain, Dst, Src, Size, Align,
- DstSV, DstOff);
+ DstSV, DstSVOff);
if (Result.Val)
return Result;
SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain,
SDOperand Ptr, SDOperand Val,
MVT::ValueType VT) {
- assert((Opcode == ISD::ATOMIC_LAS || Opcode == ISD::ATOMIC_SWAP)
+ assert(( Opcode == ISD::ATOMIC_LAS || Opcode == ISD::ATOMIC_LSS
+ || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_LOAD_AND
+ || Opcode == ISD::ATOMIC_LOAD_OR || Opcode == ISD::ATOMIC_LOAD_XOR
+ || Opcode == ISD::ATOMIC_LOAD_MIN || Opcode == ISD::ATOMIC_LOAD_MAX
+ || Opcode == ISD::ATOMIC_LOAD_UMIN || Opcode == ISD::ATOMIC_LOAD_UMAX)
&& "Invalid Atomic Op");
SDVTList VTs = getVTList(Val.getValueType(), MVT::Other);
FoldingSetNodeID ID;
}
bool Indexed = AM != ISD::UNINDEXED;
- assert(Indexed || Offset.getOpcode() == ISD::UNDEF &&
+ assert((Indexed || Offset.getOpcode() == ISD::UNDEF) &&
"Unindexed load with an offset!");
SDVTList VTs = Indexed ?
if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
return SDOperand(Existing, InN.ResNo);
- // Nope it doesn't. Remove the node from it's current place in the maps.
+ // Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
RemoveNodeFromCSEMaps(N);
/// opcode, types, and operands to the specified value. This should only be
/// used by the SelectionDAG class.
void SDNode::MorphNodeTo(unsigned Opc, SDVTList L,
- const SDOperand *Ops, unsigned NumOps) {
+ SDOperandPtr Ops, unsigned NumOps) {
NodeType = Opc;
ValueList = L.VTs;
NumValues = L.NumVTs;
RemoveNodeFromCSEMaps(N);
- N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, 0, 0);
+ N->MorphNodeTo(ISD::BUILTIN_OP_END+TargetOpc, VTs, SDOperandPtr(), 0);
CSEMap.InsertNode(N, IP);
return N;
HandleSDNode::~HandleSDNode() {
SDVTList VTs = { 0, 0 };
- MorphNodeTo(ISD::HANDLENODE, VTs, 0, 0); // Drops operand uses.
+ MorphNodeTo(ISD::HANDLENODE, VTs, SDOperandPtr(), 0); // Drops operand uses.
}
GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA,
case ISD::MEMBARRIER: return "MemBarrier";
case ISD::ATOMIC_LCS: return "AtomicLCS";
case ISD::ATOMIC_LAS: return "AtomicLAS";
- case ISD::ATOMIC_SWAP: return "AtomicSWAP";
+ case ISD::ATOMIC_LSS: return "AtomicLSS";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_SWAP: return "AtomicSWAP";
case ISD::PCMARKER: return "PCMarker";
case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
case ISD::SRCVALUE: return "SrcValue";
case ISD::FGETSIGN: return "fgetsign";
case ISD::SETCC: return "setcc";
+ case ISD::VSETCC: return "vsetcc";
case ISD::SELECT: return "select";
case ISD::SELECT_CC: return "select_cc";
case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";