/// \return True if some memory operations were changed.
bool MergeConsecutiveStores(StoreSDNode *N);
+ /// \brief Try to transform a truncation where C is a constant:
+ /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
+ ///
+ /// \p N needs to be a truncation and its first operand an AND. Other
+ /// requirements are checked by the function (e.g. that trunc is
+ /// single-use) and if missed an empty SDValue is returned.
+ SDValue distributeTruncateThroughAnd(SDNode *N);
+
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
return N0;
if (ISD::isBuildVectorAllOnes(N1.getNode()))
return N1;
+
+ // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
+ // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
+ // Do this only if the resulting shuffle is legal.
+ if (isa<ShuffleVectorSDNode>(N0) &&
+ isa<ShuffleVectorSDNode>(N1) &&
+ N0->getOperand(1) == N1->getOperand(1) &&
+ ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
+ bool CanFold = true;
+ unsigned NumElts = VT.getVectorNumElements();
+ const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
+ const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
+ // We construct two shuffle masks:
+ // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
+ // and N1 as the second operand.
+ // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
+ // and N0 as the second operand.
+ // We do this because OR is commutable and therefore there might be
+ // two ways to fold this node into a shuffle.
+ SmallVector<int,4> Mask1;
+ SmallVector<int,4> Mask2;
+
+ for (unsigned i = 0; i != NumElts && CanFold; ++i) {
+ int M0 = SV0->getMaskElt(i);
+ int M1 = SV1->getMaskElt(i);
+
+ // Both shuffle indexes are undef. Propagate Undef.
+ if (M0 < 0 && M1 < 0) {
+ Mask1.push_back(M0);
+ Mask2.push_back(M0);
+ continue;
+ }
+
+ if (M0 < 0 || M1 < 0 ||
+ (M0 < (int)NumElts && M1 < (int)NumElts) ||
+ (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
+ CanFold = false;
+ break;
+ }
+
+ Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
+ Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
+ }
+
+ if (CanFold) {
+ // Fold this sequence only if the resulting shuffle is 'legal'.
+ if (TLI.isShuffleMaskLegal(Mask1, VT))
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
+ N1->getOperand(0), &Mask1[0]);
+ if (TLI.isShuffleMaskLegal(Mask2, VT))
+ return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
+ N0->getOperand(0), &Mask2[0]);
+ }
+ }
}
// fold (or x, undef) -> -1
return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
}
+SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
+ assert(N->getOpcode() == ISD::TRUNCATE);
+ assert(N->getOperand(0).getOpcode() == ISD::AND);
+
+ // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
+ if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
+ SDValue N01 = N->getOperand(0).getOperand(1);
+
+ if (ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01)) {
+ EVT TruncVT = N->getValueType(0);
+ SDValue N00 = N->getOperand(0).getOperand(0);
+ APInt TruncC = N01C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+
+ return DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00),
+ DAG.getConstant(TruncC, TruncVT));
+ }
+ }
+
+ return SDValue();
+}
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
return DAG.getConstant(0, VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
- N1.getOperand(0).getOpcode() == ISD::AND &&
- N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
- SDValue N101 = N1.getOperand(0).getOperand(1);
- if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
- EVT TruncVT = N1.getValueType();
- SDValue N100 = N1.getOperand(0).getOperand(0);
- APInt TruncC = N101C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getSizeInBits());
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
- DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
- DAG.getNode(ISD::TRUNCATE,
- SDLoc(N),
- TruncVT, N100),
- DAG.getConstant(TruncC, TruncVT)));
- }
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+ if (NewOp1.getNode())
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
}
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
- N1.getOperand(0).getOpcode() == ISD::AND &&
- N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
- SDValue N101 = N1.getOperand(0).getOperand(1);
- if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
- EVT TruncVT = N1.getValueType();
- SDValue N100 = N1.getOperand(0).getOperand(0);
- APInt TruncC = N101C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
- return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
- DAG.getNode(ISD::AND, SDLoc(N),
- TruncVT,
- DAG.getNode(ISD::TRUNCATE,
- SDLoc(N),
- TruncVT, N100),
- DAG.getConstant(TruncC, TruncVT)));
- }
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+ if (NewOp1.getNode())
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
}
// fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
- N1.getOperand(0).getOpcode() == ISD::AND &&
- N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
- SDValue N101 = N1.getOperand(0).getOperand(1);
- if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
- EVT TruncVT = N1.getValueType();
- SDValue N100 = N1.getOperand(0).getOperand(0);
- APInt TruncC = N101C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getSizeInBits());
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
- DAG.getNode(ISD::AND, SDLoc(N),
- TruncVT,
- DAG.getNode(ISD::TRUNCATE,
- SDLoc(N),
- TruncVT, N100),
- DAG.getConstant(TruncC, TruncVT)));
- }
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+ if (NewOp1.getNode())
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
}
// fold operands of srl based on knowledge that the low bits are not
std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT LoVT, HiVT;
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
// Split the inputs.
SDValue Lo, Hi, LL, LH, RL, RH;
- llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
return SDValue();
SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
- llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
- llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
- llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
+ std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
// creates this pattern) and before operation legalization after which
// we need to be more careful about the vector instructions that we generate.
if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- LegalTypes && !LegalOperations && N0->hasOneUse()) {
+ LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
EVT VecTy = N0.getOperand(0).getValueType();
EVT ExTy = N0.getValueType();
};
}
-/// \brief Sorts LoadedSlice according to their offset.
-struct LoadedSliceSorter {
- bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) {
- assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
- return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
- }
-};
-
/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) {
// Sort the slices so that elements that are likely to be next to each
// other in memory are next to each other in the list.
- std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter());
+ std::sort(LoadedSlices.begin(), LoadedSlices.end(),
+ [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+ assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
+ return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
+ });
const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
// First (resp. Second) is the first (resp. Second) potentially candidate
// to be placed in a paired load.
unsigned SequenceNum;
};
-/// Sorts store nodes in a link according to their offset from a shared
-// base ptr.
-struct ConsecutiveMemoryChainSorter {
- bool operator()(MemOpLink LHS, MemOpLink RHS) {
- return
- LHS.OffsetFromBase < RHS.OffsetFromBase ||
- (LHS.OffsetFromBase == RHS.OffsetFromBase &&
- LHS.SequenceNum > RHS.SequenceNum);
- }
-};
-
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
// Sort the memory operands according to their distance from the base pointer.
std::sort(StoreNodes.begin(), StoreNodes.end(),
- ConsecutiveMemoryChainSorter());
+ [](MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase ||
+ (LHS.OffsetFromBase == RHS.OffsetFromBase &&
+ LHS.SequenceNum > RHS.SequenceNum);
+ });
// Scan the memory operations on the chain and find the first non-consecutive
// store memory address.