[x86] Teach the new v4i32 shuffle lowering some more tricks to recognize

[oota-llvm.git] / lib / Target / NVPTX / NVPTXISelLowering.cpp
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp

index 258c57b17c59f7d9553159953e31714e692ebe31..e3d62354285ed671bdd5a2c33a48f91825c774fe 100644 (file)
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -48,6 +48,12 @@ static cl::opt<bool> sched4reg(
      "nvptx-sched4reg",
      cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
  
+static cl::opt<unsigned>
+FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
+                    cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
+                             " 1: do it  2: do it aggressively"),
+                    cl::init(2));
+
  static bool IsPTXVectorType(MVT VT) {
    switch (VT.SimpleTy) {
    default:
@@ -100,7 +106,7 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
  }
  
  // NVPTXTargetLowering Constructor.
-NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
+NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
      : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
        nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
  
@@ -197,8 +203,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
    setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
  
    // Turn FP extload into load/fextend
+  setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
    setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
    // Turn FP truncstore into trunc + store.
+  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
  
    // PTX does not support load / store predicate registers
@@ -1442,8 +1451,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
        EVT ObjectVT = getValueType(retTy);
        unsigned NumElts = ObjectVT.getVectorNumElements();
        EVT EltVT = ObjectVT.getVectorElementType();
-      assert(nvTM->getTargetLowering()->getNumRegisters(F->getContext(),
-                                                        ObjectVT) == NumElts &&
+      assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters(
+                 F->getContext(), ObjectVT) == NumElts &&
               "Vector was not scalarized");
        unsigned sz = EltVT.getSizeInBits();
        bool needTruncate = sz < 8 ? true : false;
@@ -2019,7 +2028,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
  
    const Function *F = MF.getFunction();
    const AttributeSet &PAL = F->getAttributes();
-  const TargetLowering *TLI = DAG.getTarget().getTargetLowering();
+  const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering();
  
    SDValue Root = DAG.getRoot();
    std::vector<SDValue> OutChains;
@@ -2133,7 +2142,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
                                       ISD::SEXTLOAD : ISD::ZEXTLOAD;
              p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
                                 MachinePointerInfo(srcValue), partVT, false,
-                               false, partAlign);
+                               false, false, partAlign);
            } else {
              p = DAG.getLoad(partVT, dl, Root, srcAddr,
                              MachinePointerInfo(srcValue), false, false, false,
@@ -2266,6 +2275,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
                                         ISD::SEXTLOAD : ISD::ZEXTLOAD;
          p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg,
                             MachinePointerInfo(srcValue), ObjectVT, false, false,
+                           false,
          TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
        } else {
          p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
@@ -3260,16 +3270,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
      Info.vol = 0;
      Info.readMem = true;
      Info.writeMem = false;
-
-    // alignment is available as metadata.
-    // Grab it and set the alignment.
-    assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
-    MDNode *AlignMD = I.getMetadata("align");
-    assert(AlignMD && "Must have a non-null MDNode");
-    assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
-    Value *Align = AlignMD->getOperand(0);
-    int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
-    Info.align = Alignment;
+    Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
  
      return true;
    }
@@ -3289,16 +3290,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
      Info.vol = 0;
      Info.readMem = true;
      Info.writeMem = false;
-
-    // alignment is available as metadata.
-    // Grab it and set the alignment.
-    assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
-    MDNode *AlignMD = I.getMetadata("align");
-    assert(AlignMD && "Must have a non-null MDNode");
-    assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
-    Value *Align = AlignMD->getOperand(0);
-    int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
-    Info.align = Alignment;
+    Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
  
      return true;
    }
@@ -3799,7 +3791,31 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
  //                         NVPTX DAG Combining
  //===----------------------------------------------------------------------===//
  
-extern unsigned FMAContractLevel;
+bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
+                                   CodeGenOpt::Level OptLevel) const {
+  const Function *F = MF.getFunction();
+  const TargetOptions &TO = MF.getTarget().Options;
+
+  // Always honor command-line argument
+  if (FMAContractLevelOpt.getNumOccurrences() > 0) {
+    return FMAContractLevelOpt > 0;
+  } else if (OptLevel == 0) {
+    // Do not contract if we're not optimizing the code
+    return false;
+  } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) {
+    // Honor TargetOptions flags that explicitly say fusion is okay
+    return true;
+  } else if (F->hasFnAttribute("unsafe-fp-math")) {
+    // Check for unsafe-fp-math=true coming from Clang
+    Attribute Attr = F->getFnAttribute("unsafe-fp-math");
+    StringRef Val = Attr.getValueAsString();
+    if (Val == "true")
+      return true;
+  }
+
+  // We did not have a clear indication that fusion is allowed, so assume not
+  return false;
+}
  
  /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
  /// operands N0 and N1.  This is a helper for PerformADDCombine that is
@@ -3833,7 +3849,9 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
    }
    else if (N0.getOpcode() == ISD::FMUL) {
      if (VT == MVT::f32 || VT == MVT::f64) {
-      if (FMAContractLevel == 0)
+      const auto *TLI = static_cast<const NVPTXTargetLowering *>(
+          &DAG.getTargetLoweringInfo());
+      if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
          return SDValue();
  
        // For floating point:
@@ -4018,13 +4036,13 @@ static bool IsMulWideOperandDemotable(SDValue Op,
    if (Op.getOpcode() == ISD::SIGN_EXTEND ||
        Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
      EVT OrigVT = Op.getOperand(0).getValueType();
-    if (OrigVT.getSizeInBits() == OptSize) {
+    if (OrigVT.getSizeInBits() <= OptSize) {
        S = Signed;
        return true;
      }
    } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
      EVT OrigVT = Op.getOperand(0).getValueType();
-    if (OrigVT.getSizeInBits() == OptSize) {
+    if (OrigVT.getSizeInBits() <= OptSize) {
        S = Unsigned;
        return true;
      }
@@ -4178,8 +4196,7 @@ static SDValue PerformSHLCombine(SDNode *N,
  
  SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
-  // FIXME: Get this from the DAG somehow
-  CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive;
+  CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
    switch (N->getOpcode()) {
      default: break;
      case ISD::ADD: