#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <set>
using namespace llvm;
STATISTIC(NodesCombined , "Number of dag nodes combined");
//------------------------------ DAGCombiner ---------------------------------//
- class VISIBILITY_HIDDEN DAGCombiner {
+ class DAGCombiner {
SelectionDAG &DAG;
const TargetLowering &TLI;
CombineLevel Level;
/// it can be simplified or if things it uses can be simplified by bit
/// propagation. If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
- APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits());
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Demanded = APInt::getAllOnesValue(BitWidth);
return SimplifyDemandedBits(Op, Demanded);
}
namespace {
/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
/// nodes from the worklist.
-class VISIBILITY_HIDDEN WorkListRemover :
- public SelectionDAG::DAGUpdateListener {
+class WorkListRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
public:
explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
To[0].getNode()->dump(&DAG);
errs() << " and " << NumTo-1 << " other values\n";
for (unsigned i = 0, e = NumTo; i != e; ++i)
- assert(N->getValueType(i) == To[i].getValueType() &&
+ assert((!To[i].getNode() ||
+ N->getValueType(i) == To[i].getValueType()) &&
"Cannot combine value to value of different type!"));
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
// fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+ //
+ // do not sink logical op inside of a vector extend, since it may combine
+ // into a vsetcc.
if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
N0.getOpcode() == ISD::SIGN_EXTEND ||
(N0.getOpcode() == ISD::TRUNCATE &&
!TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
+ !VT.isVector() &&
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
(!LegalOperations ||
TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) {
}
// fold (or x, undef) -> -1
- if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ }
// fold (or c1, c2) -> c1|c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getSizeInBits();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold (shl c1, c2) -> c1<<c2
if (N0C && N1C)
return N0;
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(VT.getSizeInBits())))
+ APInt::getAllOnesValue(OpSizeInBits)))
return DAG.getConstant(0, VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold (sra c1, c2) -> (sra c1, c2)
if (N0C && N1C)
if (N0C && N0C->isAllOnesValue())
return N0;
// fold (sra x, (setge c, size(x))) -> undef
- if (N1C && N1C->getZExtValue() >= VT.getSizeInBits())
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
return DAG.getUNDEF(VT);
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
// sext_inreg.
if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
- unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue();
+ unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
if (N1C && N0.getOpcode() == ISD::SRA) {
if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
- if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1;
+ if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(Sum, N1C->getValueType(0)));
}
const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (N01C && N1C) {
// Determine what the truncate's result bitsize and type would be.
- unsigned VTValSize = VT.getSizeInBits();
EVT TruncVT =
- EVT::getIntegerVT(*DAG.getContext(), VTValSize - N1C->getZExtValue());
+ EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
// Determine the residual right-shift amount.
signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getSizeInBits());
+ TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(),
TruncVT,
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getSizeInBits();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold (srl c1, c2) -> c1 >>u c2
if (N0C && N1C)
else if (Op.getValueType().bitsGT(VT))
Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
- DAG.getValueType(N0.getValueType()));
+ DAG.getValueType(N0.getValueType().getScalarType()));
}
}
} else if (Op.getValueType().bitsGT(VT)) {
Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
}
- return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType());
+ return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
+ N0.getValueType().getScalarType());
}
// Fold (zext (and (trunc x), cst)) -> (and x, cst),
X, DAG.getConstant(Mask, VT));
}
+ // Fold (zext (and x, cst)) -> (and (zext x), cst)
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ N0.getOperand(0).getOpcode() != ISD::TRUNCATE &&
+ N0.getOperand(0).hasOneUse()) {
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0)),
+ DAG.getConstant(Mask, VT));
+ }
+
// fold (zext (load x)) -> (zext (truncate (zextload x)))
if (ISD::isNON_EXTLoad(N0.getNode()) &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
if (SCC.getNode()) return SCC;
}
+ // (zext (shl (zext x), y)) -> (shl (zext x), (zext y))
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ N0.hasOneUse()) {
+ DebugLoc dl = N->getDebugLoc();
+ return DAG.getNode(N0.getOpcode(), dl, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1)));
+ }
+
return SDValue();
}
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT EVT = cast<VTSDNode>(N1)->getVT();
- unsigned VTBits = VT.getSizeInBits();
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
unsigned EVTBits = EVT.getSizeInBits();
// fold (sext_in_reg c1) -> c1
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
- if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1)
+ if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
// if x is small enough.
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getValueType().getSizeInBits() < EVTBits)
+ if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits)
return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
}
// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
if (N0.getOpcode() == ISD::SRL) {
if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
- if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) {
+ if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
// We can turn this into an SRA iff the input to the SRL is already sign
// extended enough.
unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
- if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+ if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
N0.getOperand(0), N0.getOperand(1));
}
if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
- const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
if (ISD::isNON_EXTLoad(LD2) &&
LD2->hasOneUse() &&
// If one is volatile it might be ok, but play conservative and bail out.
!LD1->isVolatile() &&
!LD2->isVolatile() &&
- TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
+ DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
unsigned Align = LD1->getAlignment();
unsigned NewAlign = TLI.getTargetData()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- // never taken branch, fold to chain
- if (N1C && N1C->isNullValue())
- return Chain;
- // unconditional branch
- if (N1C && N1C->getAPIntValue() == 1)
- return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2);
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
// on the target.
if (N1.getOpcode() == ISD::SETCC &&
CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
// Use SimplifySetCC to simplify SETCC's.
SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
false);
if (Simp.getNode()) AddToWorkList(Simp.getNode());
- ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode());
-
- // fold br_cc true, dest -> br dest (unconditional branch)
- if (SCCC && !SCCC->isNullValue())
- return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other,
- N->getOperand(0), N->getOperand(4));
- // fold br_cc false, dest -> unconditional fall through
- if (SCCC && SCCC->isNullValue())
- return N->getOperand(0);
-
// fold to a simpler setcc
if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
return false;
}
-/// InferAlignment - If we can infer some alignment information from this
-/// pointer, return it.
-static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) {
- // If this is a direct reference to a stack slot, use information about the
- // stack slot's alignment.
- int FrameIdx = 1 << 31;
- int64_t FrameOffset = 0;
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
- FrameIdx = FI->getIndex();
- } else if (Ptr.getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(Ptr.getOperand(1)) &&
- isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
- FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
- FrameOffset = Ptr.getConstantOperandVal(1);
- }
-
- if (FrameIdx != (1 << 31)) {
- // FIXME: Handle FI+CST.
- const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
- if (MFI.isFixedObjectIndex(FrameIdx)) {
- int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
-
- // The alignment of the frame index can be determined from its offset from
- // the incoming frame position. If the frame object is at offset 32 and
- // the stack is guaranteed to be 16-byte aligned, then we know that the
- // object is 16-byte aligned.
- unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment();
- unsigned Align = MinAlign(ObjectOffset, StackAlign);
-
- // Finally, the frame object itself may have a known alignment. Factor
- // the alignment + offset into a new alignment. For example, if we know
- // the FI is 8 byte aligned, but the pointer is 4 off, we really have a
- // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte
- // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
- unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
- FrameOffset);
- return std::max(Align, FIInfoAlign);
- }
- }
-
- return 0;
-}
-
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
- if (unsigned Align = InferAlignment(Ptr, DAG)) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getAlignment())
return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
LD->getValueType(0),
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
- if (unsigned Align = InferAlignment(Ptr, DAG)) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > ST->getAlignment())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
Ptr, ST->getSrcValue(),
// SimplifyDemandedBits, which only works if the value has a single use.
if (SimplifyDemandedBits(Value,
APInt::getLowBitsSet(
- Value.getValueSizeInBits(),
+ Value.getValueType().getScalarType().getSizeInBits(),
ST->getMemoryVT().getSizeInBits())))
return SDValue(N, 0);
}
// If this is an EXTLOAD, the VT's must match.
if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
- // FIXME: this conflates two src values, discarding one. This is not
- // the right thing to do, but nothing uses srcvalues now. When they do,
- // turn SrcValue into a list of locations.
+ // FIXME: this discards src value information. This is
+ // over-conservative. It would be beneficial to be able to remember
+ // both potential memory locations.
SDValue Addr;
if (TheSelect->getOpcode() == ISD::SELECT) {
// Check that the condition doesn't reach either load. If so, folding
// this will induce a cycle into the DAG.
- if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) {
+ if ((!LLD->hasAnyUseOfValue(1) ||
+ !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
+ (!RLD->hasAnyUseOfValue(1) ||
+ !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
LLD->getBasePtr().getValueType(),
TheSelect->getOperand(0), LLD->getBasePtr(),
} else {
// Check that the condition doesn't reach either load. If so, folding
// this will induce a cycle into the DAG.
- if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()) &&
- !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())) {
+ if ((!LLD->hasAnyUseOfValue(1) ||
+ (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+ !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
+ (!RLD->hasAnyUseOfValue(1) ||
+ (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+ !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
LLD->getBasePtr().getValueType(),
TheSelect->getOperand(0),
Load = DAG.getLoad(TheSelect->getValueType(0),
TheSelect->getDebugLoc(),
LLD->getChain(),
- Addr,LLD->getSrcValue(),
- LLD->getSrcValueOffset(),
+ Addr, 0, 0,
LLD->isVolatile(),
LLD->getAlignment());
} else {
Load = DAG.getExtLoad(LLD->getExtensionType(),
TheSelect->getDebugLoc(),
TheSelect->getValueType(0),
- LLD->getChain(), Addr, LLD->getSrcValue(),
- LLD->getSrcValueOffset(),
+ LLD->getChain(), Addr, 0, 0,
LLD->getMemoryVT(),
LLD->isVolatile(),
LLD->getAlignment());