[X86] Make wide loads be managed by AtomicExpand

author Robin Morisset <morisset@google.com>

Tue, 23 Sep 2014 20:59:25 +0000 (20:59 +0000)

committer Robin Morisset <morisset@google.com>

Tue, 23 Sep 2014 20:59:25 +0000 (20:59 +0000)
author Robin Morisset <morisset@google.com>
Tue, 23 Sep 2014 20:59:25 +0000 (20:59 +0000)
committer Robin Morisset <morisset@google.com>
Tue, 23 Sep 2014 20:59:25 +0000 (20:59 +0000)
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp

index 3225731c05ccf33ee5df337bdf0d984676133b32..dd532294f44c6c3fa7bcc43ece94b31b3f96b33c 100644 (file)
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -44,6 +44,8 @@ namespace {
      bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
                                 bool IsStore, bool IsLoad);
      bool expandAtomicLoad(LoadInst *LI);
+    bool expandAtomicLoadToLL(LoadInst *LI);
+    bool expandAtomicLoadToCmpXchg(LoadInst *LI);
      bool expandAtomicStore(StoreInst *SI);
      bool expandAtomicRMW(AtomicRMWInst *AI);
      bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
@@ -160,6 +162,15 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
  }
  
  bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
+   if (TM->getSubtargetImpl()
+          ->getTargetLowering()
+          ->hasLoadLinkedStoreConditional())
+    return expandAtomicLoadToLL(LI);
+  else
+    return expandAtomicLoadToCmpXchg(LI);
+}
+
+bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
    auto TLI = TM->getSubtargetImpl()->getTargetLowering();
    IRBuilder<> Builder(LI);
  
@@ -175,6 +186,24 @@ bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
    return true;
  }
  
+bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
+  IRBuilder<> Builder(LI);
+  AtomicOrdering Order = LI->getOrdering();
+  Value *Addr = LI->getPointerOperand();
+  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+  Constant *DummyVal = Constant::getNullValue(Ty);
+
+  Value *Pair = Builder.CreateAtomicCmpXchg(
+      Addr, DummyVal, DummyVal, Order,
+      AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
+  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+
+  LI->replaceAllUsesWith(Loaded);
+  LI->eraseFromParent();
+
+  return true;
+}
+
  bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
    // This function is only called on atomic stores that are too large to be
    // atomic if implemented as a native store. So we replace them by an
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index a15ad4e936ce4b454abf3e6a666b486045bfd9be..b7c1c1d8be5834eaf80cafbf4dc4904b42822a03 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -17477,8 +17477,11 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
    return needsCmpXchgNb(SI->getValueOperand()->getType());
  }
  
-bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *SI) const {
-  return false; // FIXME, currently these are expanded separately in this file.
+// Note: this turns large loads into lock cmpxchg8b/16b.
+// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
+bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+  auto PTy = cast<PointerType>(LI->getPointerOperand()->getType());
+  return needsCmpXchgNb(PTy->getElementType());
  }
  
  bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
@@ -17855,29 +17858,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    }
  }
  
-static void ReplaceATOMIC_LOAD(SDNode *Node,
-                               SmallVectorImpl<SDValue> &Results,
-                               SelectionDAG &DAG) {
-  SDLoc dl(Node);
-  EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
-
-  // Convert wide load -> cmpxchg8b/cmpxchg16b
-  // FIXME: On 32-bit, load -> fild or movq would be more efficient
-  //        (The only way to get a 16-byte load is cmpxchg16b)
-  // FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
-  SDValue Zero = DAG.getConstant(0, VT);
-  SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
-  SDValue Swap =
-      DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, VT, VTs,
-                           Node->getOperand(0), Node->getOperand(1), Zero, Zero,
-                           cast<AtomicSDNode>(Node)->getMemOperand(),
-                           cast<AtomicSDNode>(Node)->getOrdering(),
-                           cast<AtomicSDNode>(Node)->getOrdering(),
-                           cast<AtomicSDNode>(Node)->getSynchScope());
-  Results.push_back(Swap.getValue(0));
-  Results.push_back(Swap.getValue(2));
-}
-
  /// ReplaceNodeResults - Replace a node with an illegal result type
  /// with a new node built out of custom code.
  void X86TargetLowering::ReplaceNodeResults(SDNode *N,
@@ -18036,12 +18016,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
    case ISD::ATOMIC_LOAD_MAX:
    case ISD::ATOMIC_LOAD_UMIN:
    case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_LOAD: {
      // Delegate to generic TypeLegalization. Situations we can really handle
      // should have already been dealt with by AtomicExpandPass.cpp.
      break;
-  case ISD::ATOMIC_LOAD: {
-    ReplaceATOMIC_LOAD(N, Results, DAG);
-    return;
    }
    case ISD::BITCAST: {
      assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
diff --git a/test/CodeGen/X86/atomic-load-store-wide.ll b/test/CodeGen/X86/atomic-load-store-wide.ll

index 7352d5a5800614a27bb49907936540bb20d7e511..ad1a5c6d02679b5ffb8c84a0fe2a1954b8e83ea0 100644 (file)
--- a/test/CodeGen/X86/atomic-load-store-wide.ll
+++ b/test/CodeGen/X86/atomic-load-store-wide.ll
@@ -4,16 +4,18 @@
  ; FIXME: The generated code can be substantially improved.
  
  define void @test1(i64* %ptr, i64 %val1) {
-; CHECK: test1
-; CHECK: cmpxchg8b
+; CHECK-LABEL: test1
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
  ; CHECK-NEXT: jne
    store atomic i64 %val1, i64* %ptr seq_cst, align 8
    ret void
  }
  
  define i64 @test2(i64* %ptr) {
-; CHECK: test2
-; CHECK: cmpxchg8b
+; CHECK-LABEL: test2
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
    %val = load atomic i64* %ptr seq_cst, align 8
    ret i64 %val
  }
author	Robin Morisset <morisset@google.com>
	Tue, 23 Sep 2014 20:59:25 +0000 (20:59 +0000)
committer	Robin Morisset <morisset@google.com>
	Tue, 23 Sep 2014 20:59:25 +0000 (20:59 +0000)
lib/CodeGen/AtomicExpandPass.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/atomic-load-store-wide.ll		patch \| blob \| history