SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
+ SDValue visitFCEIL(SDNode *N);
+ SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFLOOR(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
+ case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FCEIL: return visitFCEIL(N);
+ case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
// Replacing results may cause a different MERGE_VALUES to suddenly
// be CSE'd with N, and carry its uses with it. Iterate until no
// uses remain, to ensure that the node can be safely deleted.
+ // First add the users of this node to the work list so that they
+ // can be tried again once they have new operands.
+ AddUsersToWorkList(N);
do {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
- N1.getOperand(0));
+ N1.getOperand(0));
}
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
// on scalars.
if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
- && Level == AfterLegalizeVectorOps) {
+ && Level == AfterLegalizeTypes) {
SDValue In0 = N0.getOperand(0);
SDValue In1 = N1.getOperand(0);
EVT In0Ty = In0.getValueType();
Load->getOffset(), Load->getMemoryVT(),
Load->getMemOperand());
// Replace uses of the EXTLOAD with the new ZEXTLOAD.
- CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ if (Load->getNumValues() == 3) {
+ // PRE/POST_INC loads have 3 values.
+ SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
+ NewLoad.getValue(2) };
+ CombineTo(Load, To, 3, true);
+ } else {
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
}
// Fold the AND away, taking care not to fold to the old load node if we
}
}
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ APInt ADDC = ADDI->getAPIntValue();
+ if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLI->getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+ }
+ }
+
+
return SDValue();
}
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT PtrType = N0.getOperand(1).getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ // It's not possible to generate a constant of extended or untyped type.
+ return SDValue();
+
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (TLI.isBigEndian()) {
if (FoldedVOp.getNode()) return FoldedVOp;
}
- // fold (fadd c1, c2) -> (fadd c1, c2)
+ // fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP && VT != MVT::ppcf128)
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
// canonicalize constant to RHS
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
+ // FADD -> FMA combines:
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma x, y, z)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+ }
+
return SDValue();
}
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
// fold vector ops
if (VT.isVector()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+ return DAG.getNode(ISD::FNEG, dl, VT, N1);
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
- return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+ return DAG.getNode(ISD::FADD, dl, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// If 'unsafe math' is enabled, fold
}
}
+ // FSUB -> FMA combines:
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+ }
+
+ // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+ }
+
return SDValue();
}
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
}
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
+ !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+
+ // fold (sint_to_fp (zext (setcc x, y, cc))) ->
+ // (select_cc x, y, 1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
+ DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(0).getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
return SDValue();
}
return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
}
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+
+ if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
return SDValue();
}
return SDValue();
}
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
SDValue Tmp;
switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unknown FP type");
- case MVT::f80: // We don't do this for these yet.
+ case MVT::f16: // We don't do this for these yet.
+ case MVT::f80:
case MVT::f128:
case MVT::ppcf128:
break;
unsigned NumInScalars = N->getNumOperands();
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
+
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
// Check to see if this is a BUILD_VECTOR of a bunch of values
// which come from any_extend or zero_extend nodes. If so, we can create
// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
// using shuffles.
EVT SourceType = MVT::Other;
bool AllAnyExt = true;
- bool AllUndef = true;
+
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
// Ignore undef inputs.
if (In.getOpcode() == ISD::UNDEF) continue;
- AllUndef = false;
bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
AllAnyExt &= AnyExt;
}
- if (AllUndef)
- return DAG.getUNDEF(VT);
-
// In order to have valid types, all of the inputs must be extended from the
// same source type and all of the inputs must be any or zero extend.
// Scalar sizes must be a power of two.
if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
return SDValue();
- // Widen the input vector by adding undef values.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
- VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ // If the element type of the input vector is not the same as
+ // the output element type, make concat_vectors based on input element
+ // type and then bitcast it to the output vector type.
+ //
+ // In another words avoid nodes like this:
+ // <NODE> v16i8 = concat_vectors v4i16 v4i16
+ // Replace it with this one:
+ // <NODE0> v8i16 = concat_vectors v4i16 v4i16
+ // <NODE1> v16i8 = bitcast NODE0
+ EVT ItemType = VecIn1.getValueType().getVectorElementType();
+ if (ItemType != VT.getVectorElementType()) {
+ EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(),
+ ItemType,
+ VecIn1.getValueType().getVectorNumElements()*2);
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1);
+ } else
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+
}
// If VecIn2 is unused then change it to undef.
if (N->getNumOperands() == 1)
return N->getOperand(0);
+ // Check if all of the operands are undefs.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(N->getValueType(0));
+
return SDValue();
}