Revert "Revert Don't create new comdats in CodeGen"

[oota-llvm.git] / lib / Target / X86 / X86ISelDAGToDAG.cpp
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp

index f72c70c58c10858a5f70e364f209ab3ec95922d1..7fe8e8b344aa58372646ab0be10373b2be33cd44 100644 (file)
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -238,10 +238,10 @@ namespace {
      inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
                                     SDValue &Scale, SDValue &Index,
                                     SDValue &Disp, SDValue &Segment) {
-      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
-        CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
-                                    getTargetLowering()->getPointerTy()) :
-        AM.Base_Reg;
+      Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+                 ? CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
+                                               TLI->getPointerTy())
+                 : AM.Base_Reg;
        Scale = getI8Imm(AM.Scale);
        Index = AM.IndexReg;
        // These are 32-bit even in 64-bit mode since RIP relative offset
@@ -518,7 +518,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
      // If the source and destination are SSE registers, then this is a legal
      // conversion that should not be lowered.
      const X86TargetLowering *X86Lowering =
-        static_cast<const X86TargetLowering *>(getTargetLowering());
+        static_cast<const X86TargetLowering *>(TLI);
      bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
      bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
      if (SrcIsSSE && DstIsSSE)
@@ -1572,8 +1572,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
  ///
  SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
    unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
-  return CurDAG->getRegister(GlobalBaseReg,
-                             getTargetLowering()->getPointerTy()).getNode();
+  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode();
  }
  
  /// Atomic opcode table
@@ -1707,7 +1706,8 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
  static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
                                                  SDLoc dl,
                                                  enum AtomicOpc &Op, MVT NVT,
-                                                SDValue Val) {
+                                                SDValue Val,
+                                                const X86Subtarget *Subtarget) {
    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
      int64_t CNVal = CN->getSExtValue();
      // Quit if not 32-bit imm.
@@ -1722,7 +1722,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
      // For atomic-load-add, we could do some optimizations.
      if (Op == ADD) {
        // Translate to INC/DEC if ADD by 1 or -1.
-      if ((CNVal == 1) || (CNVal == -1)) {
+      if (((CNVal == 1) || (CNVal == -1)) && !Subtarget->slowIncDec()) {
          Op = (CNVal == 1) ? INC : DEC;
          // No more constant operand after being translated into INC/DEC.
          return SDValue();
@@ -1794,7 +1794,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
        break;
    }
  
-  Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
+  Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val, Subtarget);
    bool isUnOp = !Val.getNode();
    bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
  
@@ -1891,8 +1891,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
        case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
        case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
        case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
-      case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
-      case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
+      case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
+      case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
        case X86::CMOVA16rr: case X86::CMOVA16rm:
        case X86::CMOVA32rr: case X86::CMOVA32rm:
        case X86::CMOVA64rr: case X86::CMOVA64rm:
@@ -2131,6 +2131,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
    case X86ISD::GlobalBaseReg:
      return getGlobalBaseReg();
  
+  case X86ISD::SHRUNKBLEND: {
+    // SHRUNKBLEND selects like a regular VSELECT.
+    SDValue VSelect = CurDAG->getNode(
+        ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
+        Node->getOperand(1), Node->getOperand(2));
+    ReplaceUses(SDValue(Node, 0), VSelect);
+    SelectCode(VSelect.getNode());
+    // We already called ReplaceUses.
+    return nullptr;
+  }
  
    case ISD::ATOMIC_LOAD_XOR:
    case ISD::ATOMIC_LOAD_AND:
@@ -2218,6 +2228,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
      return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
                                  getI8Imm(ShlVal));
    }
+  case X86ISD::UMUL8:
+  case X86ISD::SMUL8: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
+
+    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
+                                          N0, SDValue()).getValue(1);
+
+    SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
+    SDValue Ops[] = {N1, InFlag};
+    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+
+    ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+    ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+    return nullptr;
+  }
+
    case X86ISD::UMUL: {
      SDValue N0 = Node->getOperand(0);
      SDValue N1 = Node->getOperand(1);
@@ -2393,11 +2422,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
    }
  
    case ISD::SDIVREM:
-  case ISD::UDIVREM: {
+  case ISD::UDIVREM:
+  case X86ISD::SDIVREM8_SEXT_HREG:
+  case X86ISD::UDIVREM8_ZEXT_HREG: {
      SDValue N0 = Node->getOperand(0);
      SDValue N1 = Node->getOperand(1);
  
-    bool isSigned = Opcode == ISD::SDIVREM;
+    bool isSigned = (Opcode == ISD::SDIVREM ||
+                     Opcode == X86ISD::SDIVREM8_SEXT_HREG);
      if (!isSigned) {
        switch (NVT.SimpleTy) {
        default: llvm_unreachable("Unsupported VT!");
@@ -2472,7 +2504,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
            SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
        } else {
          // Zero out the high part, effectively zero extending the input.
-        SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);       
+        SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
          switch (NVT.SimpleTy) {
          case MVT::i16:
            ClrNode =
@@ -2513,33 +2545,43 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
          SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
      }
  
-    // Prevent use of AH in a REX instruction by referencing AX instead.
-    // Shift it down 8 bits.
+    // Prevent use of AH in a REX instruction by explicitly copying it to
+    // an ABCD_L register.
      //
      // The current assumption of the register allocator is that isel
-    // won't generate explicit references to the GPR8_NOREX registers. If
+    // won't generate explicit references to the GR8_ABCD_H registers. If
      // the allocator and/or the backend get enhanced to be more robust in
      // that regard, this can be, and should be, removed.
-    if (HiReg == X86::AH && Subtarget->is64Bit() &&
-        !SDValue(Node, 1).use_empty()) {
-      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                              X86::AX, MVT::i16, InFlag);
-      InFlag = Result.getValue(2);
-
-      // If we also need AL (the quotient), get it by extracting a subreg from
-      // Result. The fast register allocator does not like multiple CopyFromReg
-      // nodes using aliasing registers.
-      if (!SDValue(Node, 0).use_empty())
-        ReplaceUses(SDValue(Node, 0),
-          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
-
-      // Shift AX right by 8 bits instead of using AH.
-      Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
-                                         Result,
-                                         CurDAG->getTargetConstant(8, MVT::i8)),
-                       0);
-      ReplaceUses(SDValue(Node, 1),
-        CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+    if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
+      SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
+      unsigned AHExtOpcode =
+          isSigned ? X86::MOVSX32_NOREXrr8 : X86::MOVZX32_NOREXrr8;
+
+      SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
+                                             MVT::Glue, AHCopy, InFlag);
+      SDValue Result(RNode, 0);
+      InFlag = SDValue(RNode, 1);
+
+      if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
+          Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
+        if (Node->getValueType(1) == MVT::i64) {
+          // It's not possible to directly movsx AH to a 64bit register, because
+          // the latter needs the REX prefix, but the former can't have it.
+          assert(Opcode != X86ISD::SDIVREM8_SEXT_HREG &&
+                 "Unexpected i64 sext of h-register");
+          Result =
+              SDValue(CurDAG->getMachineNode(
+                          TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
+                          CurDAG->getTargetConstant(0, MVT::i64), Result,
+                          CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
+                      0);
+        }
+      } else {
+        Result =
+            CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
+      }
+      ReplaceUses(SDValue(Node, 1), Result);
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
      }
      // Copy the division (low) result, if it is needed.
      if (!SDValue(Node, 0).use_empty()) {