#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
SDValue PromoteExtend(SDValue Op);
bool PromoteLoad(SDValue Op);
- void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+ void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
SDValue Trunc, SDValue ExtLoad, SDLoc DL,
ISD::NodeType ExtType);
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void GatherAllAliases(SDNode *N, SDValue OriginalChain,
- SmallVector<SDValue, 8> &Aliases);
+ SmallVectorImpl<SDValue> &Aliases);
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1 == 0)
return N1;
+ // We require a splat of the entire scalar bit width for non-contiguous
+ // bit patterns.
+ bool IsFullSplat =
+ ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1 == 1)
+ if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
return N0;
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnesValue())
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getConstant(0, VT), N0);
// fold (mul x, (1 << c)) -> x << c
- if (N1IsConst && ConstValue1.isPowerOf2())
+ if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
DAG.getConstant(ConstValue1.logBase2(),
getShiftAmountTy(N0.getValueType())));
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
- if (N1IsConst && (-ConstValue1).isPowerOf2()) {
+ if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
unsigned Log2Val = (-ConstValue1).logBase2();
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
}
}
+ // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
+ if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
+ Op0 == Op1 && LL.getValueType().isInteger() &&
+ Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
+ cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
+ (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ cast<ConstantSDNode>(RR)->isNullValue()))) {
+ SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
+ LL, DAG.getConstant(1, LL.getValueType()));
+ AddToWorkList(ADDNode.getNode());
+ return DAG.getSetCC(SDLoc(N), VT, ADDNode,
+ DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
+ }
// canonicalize equivalent to ll == rl
if (LL == RR && LR == RL) {
Op1 = ISD::getSetCCSwappedOperands(Op1);
}
}
+ // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
+ if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
+ SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false);
+ if (BSwap.getNode())
+ return BSwap;
+ }
+
return SDValue();
}
if (N00 != N10)
return SDValue();
- // Make sure everything beyond the low halfword is zero since the SRL 16
- // will clear the top bits.
+ // Make sure everything beyond the low halfword gets set to zero since the SRL
+ // 16 will clear the top bits.
unsigned OpSizeInBits = VT.getSizeInBits();
- if (DemandHighBits && OpSizeInBits > 16 &&
- (!LookPassAnd0 || !LookPassAnd1) &&
- !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16)))
- return SDValue();
+ if (DemandHighBits && OpSizeInBits > 16) {
+ // If the left-shift isn't masked out then the only way this is a bswap is
+ // if all bits beyond the low 8 are 0. In that case the entire pattern
+ // reduces to a left shift anyway: leave it for other parts of the combiner.
+ if (!LookPassAnd0)
+ return SDValue();
+
+ // However, if the right shift isn't masked out then it might be because
+ // it's not needed. See if we can spot that too.
+ if (!LookPassAnd1 &&
+ !DAG.MaskedValueIsZero(
+ N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
+ return SDValue();
+ }
SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
if (OpSizeInBits > 16)
/// isBSwapHWordElement - Return true if the specified node is an element
/// that makes up a 32-bit packed halfword byteswap. i.e.
/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
-static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) {
+static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) {
if (!N.getNode()->hasOneUse())
return false;
unsigned OpSizeInBits = VT.getSizeInBits();
SDValue LHSShiftArg = LHSShift.getOperand(0);
SDValue LHSShiftAmt = LHSShift.getOperand(1);
+ SDValue RHSShiftArg = RHSShift.getOperand(0);
SDValue RHSShiftAmt = RHSShift.getOperand(1);
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
LHSShiftArg,
HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+ else if (LHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
+ LHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
+ // fold (or (shl (*ext x), (*ext y)),
+ // (srl (*ext x), (*ext (sub 32, y)))) ->
+ // (*ext (rotl x, y))
+ // fold (or (shl (*ext x), (*ext y)),
+ // (srl (*ext x), (*ext (sub 32, y)))) ->
+ // (*ext (rotr x, (sub 32, y)))
+ SDValue LArgExtOp0 = LHSShiftArg.getOperand(0);
+ EVT LArgVT = LArgExtOp0.getValueType();
+ if (LArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
+ SDValue V = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, LArgVT,
+ LArgExtOp0,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt);
+ return DAG.getNode(LHSShiftArg.getOpcode(), DL, VT, V).getNode();
+ }
+ }
} else if (LExtOp0.getOpcode() == ISD::SUB &&
RExtOp0 == LExtOp0.getOperand(1)) {
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
LHSShiftArg,
HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+ else if (RHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
+ RHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
+ // fold (or (shl (*ext x), (*ext (sub 32, y))),
+ // (srl (*ext x), (*ext y))) ->
+ // (*ext (rotl x, y))
+ // fold (or (shl (*ext x), (*ext (sub 32, y))),
+ // (srl (*ext x), (*ext y))) ->
+ // (*ext (rotr x, (sub 32, y)))
+ SDValue RArgExtOp0 = RHSShiftArg.getOperand(0);
+ EVT RArgVT = RArgExtOp0.getValueType();
+ if (RArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
+ SDValue V = DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, RArgVT,
+ RArgExtOp0,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt);
+ return DAG.getNode(RHSShiftArg.getOpcode(), DL, VT, V).getNode();
+ }
+ }
}
}
// mentioned transformation is profitable.
static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
unsigned ExtOpc,
- SmallVector<SDNode*, 4> &ExtendNodes,
+ SmallVectorImpl<SDNode *> &ExtendNodes,
const TargetLowering &TLI) {
bool HasCopyToRegUses = false;
bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
return true;
}
-void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
SDValue Trunc, SDValue ExtLoad, SDLoc DL,
ISD::NodeType ExtType) {
// Extend SetCC uses if necessary.
SDValue EltNo = N0->getOperand(1);
if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- EVT IndexTy = N0->getOperand(1).getValueType();
+ EVT IndexTy = TLI.getVectorIdxTy();
int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
// This often reduces constant pool loads.
- if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
- (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
+ if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
+ (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
N0.getNode()->hasOneUse() && VT.isInteger() &&
!VT.isVector() && !N0.getValueType().isVector()) {
SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
&DAG.getTarget().Options))
return GetNegatedExpression(N11, DAG, LegalOperations);
-
+
if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
&DAG.getTarget().Options))
return GetNegatedExpression(N10, DAG, LegalOperations);
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
return DAG.getNode(ISD::FMA, dl, VT,
DAG.getNode(ISD::FNEG, dl, VT,
N1.getOperand(0)),
}
}
- if (CombinerAA) {
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
+ TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ if (UseAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
static BaseIndexOffset match(SDValue Ptr) {
bool IsIndexSignExt = false;
- // Just Base or possibly anything else.
+ // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
+ // instruction, then it could be just the BASE or everything else we don't
+ // know how to handle. Just use Ptr as BASE and give up.
if (Ptr->getOpcode() != ISD::ADD)
return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
- // Base + offset.
+ // We know that we have at least an ADD instruction. Try to pattern match
+ // the simple case of BASE + OFFSET.
if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
IsIndexSignExt);
}
+ // Inside a loop the current BASE pointer is calculated using an ADD and a
+ // MUL instruction. In this case Ptr is the actual BASE pointer.
+ // (i64 add (i64 %array_ptr)
+ // (i64 mul (i64 %induction_var)
+ // (i64 %element_size)))
+ if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
// Look at Base + Index + Offset cases.
SDValue Base = Ptr->getOperand(0);
SDValue IndexOffset = Ptr->getOperand(1);
// transform should not be done in this case.
if (Value.getOpcode() != ISD::TargetConstantFP) {
SDValue Tmp;
- switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (CFP->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unknown FP type");
case MVT::f16: // We don't do this for these yet.
case MVT::f80:
if (NewST.getNode())
return NewST;
- if (CombinerAA) {
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
+ TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ if (UseAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
// vector elements.
SmallVector<SDValue, 8> Ops;
- if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ // Do not combine these two vectors if the output vector will not replace
+ // the input vector.
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
Ops.append(InVec.getNode()->op_begin(),
InVec.getNode()->op_end());
} else if (InVec.getOpcode() == ISD::UNDEF) {
OrigElt -= NumElem;
}
- EVT IndexTy = N->getOperand(1).getValueType();
+ EVT IndexTy = TLI.getVectorIdxTy();
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
InVec, DAG.getConstant(OrigElt, IndexTy));
}
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx >= 0) {
- if (Idx < (int)NumElts)
- Idx += NumElts;
- else
+ if (Idx >= (int)NumElts)
Idx -= NumElts;
+ else
+ Idx = -1; // remove reference to lhs
}
NewMask.push_back(Idx);
}
SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
Cond, One, Zero);
AddToWorkList(CstOffset.getNode());
- CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+ CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
CstOffset);
AddToWorkList(CPIdx.getNode());
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
return false;
}
- if (CombinerGlobalAA) {
+ bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA :
+ TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ if (UseAA && SrcValue1 && SrcValue2) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
- SmallVector<SDValue, 8> &Aliases) {
+ SmallVectorImpl<SDValue> &Aliases) {
SmallVector<SDValue, 8> Chains; // List of chains to visit.
SmallPtrSet<SDNode *, 16> Visited; // Visited node set.