"nvptx-sched4reg",
cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
+static cl::opt<unsigned>
+FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
+ " 1: do it 2: do it aggressively"),
+ cl::init(2));
+
static bool IsPTXVectorType(MVT VT) {
switch (VT.SimpleTy) {
default:
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
// Turn FP extload into load/fextend
+ setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
// Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PTX does not support load / store predicate registers
// NVPTX DAG Combining
//===----------------------------------------------------------------------===//
-extern unsigned FMAContractLevel;
+bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
+ CodeGenOpt::Level OptLevel) const {
+ const Function *F = MF.getFunction();
+ const TargetOptions &TO = MF.getTarget().Options;
+
+ // Always honor command-line argument
+ if (FMAContractLevelOpt.getNumOccurrences() > 0) {
+ return FMAContractLevelOpt > 0;
+ } else if (OptLevel == 0) {
+ // Do not contract if we're not optimizing the code
+ return false;
+ } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) {
+ // Honor TargetOptions flags that explicitly say fusion is okay
+ return true;
+ } else if (F->hasFnAttribute("unsafe-fp-math")) {
+ // Check for unsafe-fp-math=true coming from Clang
+ Attribute Attr = F->getFnAttribute("unsafe-fp-math");
+ StringRef Val = Attr.getValueAsString();
+ if (Val == "true")
+ return true;
+ }
+
+ // We did not have a clear indication that fusion is allowed, so assume not
+ return false;
+}
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
/// operands N0 and N1. This is a helper for PerformADDCombine that is
}
else if (N0.getOpcode() == ISD::FMUL) {
if (VT == MVT::f32 || VT == MVT::f64) {
- if (FMAContractLevel == 0)
+ NVPTXTargetLowering *TLI =
+ (NVPTXTargetLowering *)&DAG.getTargetLoweringInfo();
+ if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
return SDValue();
// For floating point:
if (Op.getOpcode() == ISD::SIGN_EXTEND ||
Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getSizeInBits() == OptSize) {
+ if (OrigVT.getSizeInBits() <= OptSize) {
S = Signed;
return true;
}
} else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getSizeInBits() == OptSize) {
+ if (OrigVT.getSizeInBits() <= OptSize) {
S = Unsigned;
return true;
}
SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
- // FIXME: Get this from the DAG somehow
- CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive;
+ CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
switch (N->getOpcode()) {
default: break;
case ISD::ADD: