From ca396e391e13d417605ebed06780d92c88f14a6b Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Tue, 11 Mar 2014 10:48:52 +0000
Subject: [PATCH] IR: add a second ordering operand to cmpxhg for failure

The syntax for "cmpxchg" should now look something like:

	cmpxchg i32* %addr, i32 42, i32 3 acquire monotonic

where the second ordering argument gives the required semantics in the case
that no exchange takes place. It should be no stronger than the first ordering
constraint and cannot be either "release" or "acq_rel" (since no store will
have taken place).

rdar://problem/15996804

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203559 91177308-0d34-0410-b5e6-96231b3b80d8
---
 docs/LangRef.rst                              |  20 +++--
 include/llvm/CodeGen/SelectionDAG.h           |  14 ++-
 include/llvm/CodeGen/SelectionDAGNodes.h      |  50 ++++++++---
 include/llvm/IR/IRBuilder.h                   |  11 ++-
 include/llvm/IR/Instructions.h                |  53 ++++++++++--
 lib/Analysis/AliasAnalysis.cpp                |   2 +-
 lib/AsmParser/LLParser.cpp                    |  27 ++++--
 lib/AsmParser/LLParser.h                      |   1 +
 lib/Bitcode/Reader/BitcodeReader.cpp          |  20 +++--
 lib/Bitcode/Writer/BitcodeWriter.cpp          |   4 +-
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp      |   1 +
 .../SelectionDAG/LegalizeIntegerTypes.cpp     |   9 +-
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp     |  28 ++++--
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  10 ++-
 lib/IR/AsmWriter.cpp                          |  34 +++++++-
 lib/IR/AsmWriter.h                            |   3 +
 lib/IR/Instruction.cpp                        |  10 ++-
 lib/IR/Instructions.cpp                       |  27 ++++--
 lib/IR/Verifier.cpp                           |  17 +++-
 lib/Target/ARM/ARMISelLowering.cpp            |   8 +-
 lib/Target/CppBackend/CPPBackend.cpp          |   8 +-
 lib/Target/X86/X86ISelLowering.cpp            |   1 +
 lib/Transforms/IPO/MergeFunctions.cpp         |   5 +-
 .../Instrumentation/MemorySanitizer.cpp       |   2 +-
 .../Instrumentation/ThreadSanitizer.cpp       |  19 +----
 test/Assembler/atomic.ll                      |  10 ++-
 test/Bitcode/cmpxchg-upgrade.ll               |  23 +++++
 test/Bitcode/cmpxchg-upgrade.ll.bc            | Bin 0 -> 360 bytes
 test/Bitcode/memInstructions.3.2.ll           |  80 +++++++++---------
 test/CodeGen/AArch64/atomic-ops.ll            |   8 +-
 test/CodeGen/ARM/atomic-64bit.ll              |   2 +-
 test/CodeGen/ARM/atomic-cmp.ll                |   2 +-
 test/CodeGen/ARM/atomic-ops-v8.ll             |   8 +-
 test/CodeGen/Mips/atomic.ll                   |   6 +-
 test/CodeGen/Mips/atomicops.ll                |   2 +-
 test/CodeGen/PowerPC/Atomics-32.ll            |  32 +++----
 test/CodeGen/PowerPC/Atomics-64.ll            |  32 +++----
 test/CodeGen/PowerPC/atomic-1.ll              |   2 +-
 test/CodeGen/PowerPC/atomic-2.ll              |   2 +-
 test/CodeGen/SPARC/atomics.ll                 |   4 +-
 test/CodeGen/SystemZ/cmpxchg-01.ll            |   4 +-
 test/CodeGen/SystemZ/cmpxchg-02.ll            |   4 +-
 test/CodeGen/SystemZ/cmpxchg-03.ll            |  24 +++---
 test/CodeGen/SystemZ/cmpxchg-04.ll            |  18 ++--
 test/CodeGen/X86/2010-10-08-cmpxchg8b.ll      |   2 +-
 test/CodeGen/X86/Atomics-64.ll                |  40 ++++-----
 test/CodeGen/X86/atomic16.ll                  |   2 +-
 test/CodeGen/X86/atomic32.ll                  |   2 +-
 test/CodeGen/X86/atomic64.ll                  |   2 +-
 test/CodeGen/X86/atomic6432.ll                |   2 +-
 test/CodeGen/X86/atomic8.ll                   |   2 +-
 test/CodeGen/X86/atomic_op.ll                 |   6 +-
 test/CodeGen/X86/cmpxchg16b.ll                |   2 +-
 test/CodeGen/X86/coalescer-remat.ll           |   2 +-
 test/CodeGen/X86/nocx16.ll                    |   2 +-
 .../AddressSanitizer/test64.ll                |   2 +-
 .../MemorySanitizer/atomics.ll                |  10 +--
 .../Instrumentation/ThreadSanitizer/atomic.ll |  50 +++++------
 test/Transforms/LowerAtomic/atomic-swap.ll    |   2 +-
 .../SimplifyCFG/trapping-load-unreachable.ll  |   2 +-
 60 files changed, 489 insertions(+), 288 deletions(-)
 create mode 100644 test/Bitcode/cmpxchg-upgrade.ll
 create mode 100644 test/Bitcode/cmpxchg-upgrade.ll.bc

diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 760a064f69e..cf1243a04b1 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -1496,7 +1496,7 @@ Atomic Memory Ordering Constraints
 Atomic instructions (:ref:`cmpxchg <i_cmpxchg>`,
 :ref:`atomicrmw <i_atomicrmw>`, :ref:`fence <i_fence>`,
 :ref:`atomic load <i_load>`, and :ref:`atomic store <i_store>`) take
-an ordering parameter that determines which other atomic instructions on
+ordering parameters that determine which other atomic instructions on
 the same address they *synchronize with*. These semantics are borrowed
 from Java and C++0x, but are somewhat more colloquial. If these
 descriptions aren't precise enough, check those specs (see spec
@@ -4990,7 +4990,7 @@ Syntax:
 
 ::
 
-      cmpxchg [volatile] <ty>* <pointer>, <ty> <cmp>, <ty> <new> [singlethread] <ordering>  ; yields {ty}
+      cmpxchg [volatile] <ty>* <pointer>, <ty> <cmp>, <ty> <new> [singlethread] <success ordering> <failure ordering> ; yields {ty}
 
 Overview:
 """""""""
@@ -5013,8 +5013,11 @@ type, and the type of '<pointer>' must be a pointer to that type. If the
 to modify the number or order of execution of this ``cmpxchg`` with
 other :ref:`volatile operations <volatile>`.
 
-The :ref:`ordering <ordering>` argument specifies how this ``cmpxchg``
-synchronizes with other atomic operations.
+The success and failure :ref:`ordering <ordering>` arguments specify how this
+``cmpxchg`` synchronizes with other atomic operations. The both ordering
+parameters must be at least ``monotonic``, the ordering constraint on failure
+must be no stronger than that on success, and the failure ordering cannot be
+either ``release`` or ``acq_rel``.
 
 The optional "``singlethread``" argument declares that the ``cmpxchg``
 is only atomic with respect to code (usually signal handlers) running in
@@ -5032,10 +5035,9 @@ operand is read and compared to '``<cmp>``'; if the read value is the
 equal, '``<new>``' is written. The original value at the location is
 returned.
 
-A successful ``cmpxchg`` is a read-modify-write instruction for the purpose
-of identifying release sequences. A failed ``cmpxchg`` is equivalent to an
-atomic load with an ordering parameter determined by dropping any
-``release`` part of the ``cmpxchg``'s ordering.
+A successful ``cmpxchg`` is a read-modify-write instruction for the purpose of
+identifying release sequences. A failed ``cmpxchg`` is equivalent to an atomic
+load with an ordering parameter determined the second ordering parameter.
 
 Example:
 """"""""
@@ -5049,7 +5051,7 @@ Example:
     loop:
       %cmp = phi i32 [ %orig, %entry ], [%old, %loop]
       %squared = mul i32 %cmp, %cmp
-      %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared          ; yields {i32}
+      %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared acq_rel monotonic ; yields {i32}
       %success = icmp eq i32 %cmp, %old
       br i1 %success, label %done, label %loop
 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index adba296e38c..a30656a1129 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -692,12 +692,14 @@ public:
   SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain,
                     SDValue Ptr, SDValue Cmp, SDValue Swp,
                     MachinePointerInfo PtrInfo, unsigned Alignment,
-                    AtomicOrdering Ordering,
+                    AtomicOrdering SuccessOrdering,
+                    AtomicOrdering FailureOrdering,
                     SynchronizationScope SynchScope);
   SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain,
                     SDValue Ptr, SDValue Cmp, SDValue Swp,
                     MachineMemOperand *MMO,
-                    AtomicOrdering Ordering,
+                    AtomicOrdering SuccessOrdering,
+                    AtomicOrdering FailureOrdering,
                     SynchronizationScope SynchScope);
 
   /// getAtomic - Gets a node for an atomic op, produces result (if relevant)
@@ -726,9 +728,13 @@ public:
   /// getAtomic - Gets a node for an atomic op, produces result and chain and
   /// takes N operands.
   SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTList,
-                    SDValue* Ops, unsigned NumOps, MachineMemOperand *MMO,
-                    AtomicOrdering Ordering,
+                    SDValue *Ops, unsigned NumOps, MachineMemOperand *MMO,
+                    AtomicOrdering SuccessOrdering,
+                    AtomicOrdering FailureOrdering,
                     SynchronizationScope SynchScope);
+  SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTList,
+                    SDValue *Ops, unsigned NumOps, MachineMemOperand *MMO,
+                    AtomicOrdering Ordering, SynchronizationScope SynchScope);
 
   /// getMemIntrinsicNode - Creates a MemIntrinsicNode that may produce a
   /// result and takes a list of operands. Opcode may be INTRINSIC_VOID,
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 74c291eba17..0b18d1d358c 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1093,15 +1093,27 @@ public:
 class AtomicSDNode : public MemSDNode {
   SDUse Ops[4];
 
-  void InitAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope) {
+  /// For cmpxchg instructions, the ordering requirements when a store does not
+  /// occur.
+  AtomicOrdering FailureOrdering;
+
+  void InitAtomic(AtomicOrdering SuccessOrdering,
+                  AtomicOrdering FailureOrdering,
+                  SynchronizationScope SynchScope) {
     // This must match encodeMemSDNodeFlags() in SelectionDAG.cpp.
-    assert((Ordering & 15) == Ordering &&
+    assert((SuccessOrdering & 15) == SuccessOrdering &&
+           "Ordering may not require more than 4 bits!");
+    assert((FailureOrdering & 15) == FailureOrdering &&
            "Ordering may not require more than 4 bits!");
     assert((SynchScope & 1) == SynchScope &&
            "SynchScope may not require more than 1 bit!");
-    SubclassData |= Ordering << 8;
+    SubclassData |= SuccessOrdering << 8;
     SubclassData |= SynchScope << 12;
-    assert(getOrdering() == Ordering && "Ordering encoding error!");
+    this->FailureOrdering = FailureOrdering;
+    assert(getSuccessOrdering() == SuccessOrdering &&
+           "Ordering encoding error!");
+    assert(getFailureOrdering() == FailureOrdering &&
+           "Ordering encoding error!");
     assert(getSynchScope() == SynchScope && "Synch-scope encoding error!");
   }
 
@@ -1115,12 +1127,11 @@ public:
   // SrcVal: address to update as a Value (used for MemOperand)
   // Align:  alignment of memory
   AtomicSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTL,
-               EVT MemVT,
-               SDValue Chain, SDValue Ptr,
-               SDValue Cmp, SDValue Swp, MachineMemOperand *MMO,
-               AtomicOrdering Ordering, SynchronizationScope SynchScope)
-    : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
-    InitAtomic(Ordering, SynchScope);
+               EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp,
+               MachineMemOperand *MMO, AtomicOrdering Ordering,
+               SynchronizationScope SynchScope)
+      : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
+    InitAtomic(Ordering, Ordering, SynchScope);
     InitOperands(Ops, Chain, Ptr, Cmp, Swp);
   }
   AtomicSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTL,
@@ -1129,7 +1140,7 @@ public:
                SDValue Val, MachineMemOperand *MMO,
                AtomicOrdering Ordering, SynchronizationScope SynchScope)
     : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
-    InitAtomic(Ordering, SynchScope);
+    InitAtomic(Ordering, Ordering, SynchScope);
     InitOperands(Ops, Chain, Ptr, Val);
   }
   AtomicSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTL,
@@ -1138,15 +1149,16 @@ public:
                MachineMemOperand *MMO,
                AtomicOrdering Ordering, SynchronizationScope SynchScope)
     : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
-    InitAtomic(Ordering, SynchScope);
+    InitAtomic(Ordering, Ordering, SynchScope);
     InitOperands(Ops, Chain, Ptr);
   }
   AtomicSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTL, EVT MemVT,
                SDValue* AllOps, SDUse *DynOps, unsigned NumOps,
                MachineMemOperand *MMO,
-               AtomicOrdering Ordering, SynchronizationScope SynchScope)
+               AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering,
+               SynchronizationScope SynchScope)
     : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
-    InitAtomic(Ordering, SynchScope);
+    InitAtomic(SuccessOrdering, FailureOrdering, SynchScope);
     assert((DynOps || NumOps <= array_lengthof(Ops)) &&
            "Too many ops for internal storage!");
     InitOperands(DynOps ? DynOps : Ops, AllOps, NumOps);
@@ -1155,6 +1167,16 @@ public:
   const SDValue &getBasePtr() const { return getOperand(1); }
   const SDValue &getVal() const { return getOperand(2); }
 
+  AtomicOrdering getSuccessOrdering() const {
+    return getOrdering();
+  }
+
+  // Not quite enough room in SubclassData for everything, so failure gets its
+  // own field.
+  AtomicOrdering getFailureOrdering() const {
+    return FailureOrdering;
+  }
+
   bool isCompareAndSwap() const {
     unsigned Op = getOpcode();
     return Op == ISD::ATOMIC_CMP_SWAP;
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index d28652f1251..79ee7b753db 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -934,10 +934,13 @@ public:
                          const Twine &Name = "") {
     return Insert(new FenceInst(Context, Ordering, SynchScope), Name);
   }
-  AtomicCmpXchgInst *CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New,
-                                         AtomicOrdering Ordering,
-                               SynchronizationScope SynchScope = CrossThread) {
-    return Insert(new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, SynchScope));
+  AtomicCmpXchgInst *
+  CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New,
+                      AtomicOrdering SuccessOrdering,
+                      AtomicOrdering FailureOrdering,
+                      SynchronizationScope SynchScope = CrossThread) {
+    return Insert(new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering,
+                                        FailureOrdering, SynchScope));
   }
   AtomicRMWInst *CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val,
                                  AtomicOrdering Ordering,
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index e1a3b04720a..154dce32176 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -463,7 +463,8 @@ private:
 class AtomicCmpXchgInst : public Instruction {
   void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
   void Init(Value *Ptr, Value *Cmp, Value *NewVal,
-            AtomicOrdering Ordering, SynchronizationScope SynchScope);
+            AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering,
+            SynchronizationScope SynchScope);
 protected:
   AtomicCmpXchgInst *clone_impl() const override;
 public:
@@ -472,10 +473,14 @@ public:
     return User::operator new(s, 3);
   }
   AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
-                    AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                    AtomicOrdering SuccessOrdering,
+                    AtomicOrdering FailureOrdering,
+                    SynchronizationScope SynchScope,
                     Instruction *InsertBefore = 0);
   AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
-                    AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                    AtomicOrdering SuccessOrdering,
+                    AtomicOrdering FailureOrdering,
+                    SynchronizationScope SynchScope,
                     BasicBlock *InsertAtEnd);
 
   /// isVolatile - Return true if this is a cmpxchg from a volatile memory
@@ -496,13 +501,20 @@ public:
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
   /// Set the ordering constraint on this cmpxchg.
-  void setOrdering(AtomicOrdering Ordering) {
+  void setSuccessOrdering(AtomicOrdering Ordering) {
     assert(Ordering != NotAtomic &&
            "CmpXchg instructions can only be atomic.");
-    setInstructionSubclassData((getSubclassDataFromInstruction() & 3) |
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~0x1c) |
                                (Ordering << 2));
   }
 
+  void setFailureOrdering(AtomicOrdering Ordering) {
+    assert(Ordering != NotAtomic &&
+           "CmpXchg instructions can only be atomic.");
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~0xe0) |
+                               (Ordering << 5));
+  }
+
   /// Specify whether this cmpxchg is atomic and orders other operations with
   /// respect to all concurrently executing threads, or only with respect to
   /// signal handlers executing in the same thread.
@@ -512,8 +524,13 @@ public:
   }
 
   /// Returns the ordering constraint on this cmpxchg.
-  AtomicOrdering getOrdering() const {
-    return AtomicOrdering(getSubclassDataFromInstruction() >> 2);
+  AtomicOrdering getSuccessOrdering() const {
+    return AtomicOrdering((getSubclassDataFromInstruction() >> 2) & 7);
+  }
+
+  /// Returns the ordering constraint on this cmpxchg.
+  AtomicOrdering getFailureOrdering() const {
+    return AtomicOrdering((getSubclassDataFromInstruction() >> 5) & 7);
   }
 
   /// Returns whether this cmpxchg is atomic between threads or only within a
@@ -537,6 +554,28 @@ public:
     return getPointerOperand()->getType()->getPointerAddressSpace();
   }
 
+  /// \brief Returns the strongest permitted ordering on failure, given the
+  /// desired ordering on success.
+  ///
+  /// If the comparison in a cmpxchg operation fails, there is no atomic store
+  /// so release semantics cannot be provided. So this function drops explicit
+  /// Release requests from the AtomicOrdering. A SequentiallyConsistent
+  /// operation would remain SequentiallyConsistent.
+  static AtomicOrdering
+  getStrongestFailureOrdering(AtomicOrdering SuccessOrdering) {
+    switch (SuccessOrdering) {
+    default: llvm_unreachable("invalid cmpxchg success ordering");
+    case Release:
+    case Monotonic:
+      return Monotonic;
+    case AcquireRelease:
+    case Acquire:
+      return Acquire;
+    case SequentiallyConsistent:
+      return SequentiallyConsistent;
+    }
+  }
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::AtomicCmpXchg;
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 36ed40d3549..9583bbe5e34 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -338,7 +338,7 @@ AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) {
   // Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
-  if (CX->getOrdering() > Monotonic)
+  if (CX->getSuccessOrdering() > Monotonic)
     return ModRef;
 
   // If the cmpxchg address does not alias the location, it does not access it.
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index f29ceddf643..f75d3c944a5 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1518,6 +1518,15 @@ bool LLParser::ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
   Scope = CrossThread;
   if (EatIfPresent(lltok::kw_singlethread))
     Scope = SingleThread;
+
+  return ParseOrdering(Ordering);
+}
+
+/// ParseOrdering
+///   ::= AtomicOrdering
+///
+/// This sets Ordering to the parsed value.
+bool LLParser::ParseOrdering(AtomicOrdering &Ordering) {
   switch (Lex.getKind()) {
   default: return TokError("Expected ordering on atomic instruction");
   case lltok::kw_unordered: Ordering = Unordered; break;
@@ -4193,11 +4202,12 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
 
 /// ParseCmpXchg
 ///   ::= 'cmpxchg' 'volatile'? TypeAndValue ',' TypeAndValue ',' TypeAndValue
-///       'singlethread'? AtomicOrdering
+///       'singlethread'? AtomicOrdering AtomicOrdering
 int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Ptr, *Cmp, *New; LocTy PtrLoc, CmpLoc, NewLoc;
   bool AteExtraComma = false;
-  AtomicOrdering Ordering = NotAtomic;
+  AtomicOrdering SuccessOrdering = NotAtomic;
+  AtomicOrdering FailureOrdering = NotAtomic;
   SynchronizationScope Scope = CrossThread;
   bool isVolatile = false;
 
@@ -4209,11 +4219,16 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
       ParseTypeAndValue(Cmp, CmpLoc, PFS) ||
       ParseToken(lltok::comma, "expected ',' after cmpxchg cmp operand") ||
       ParseTypeAndValue(New, NewLoc, PFS) ||
-      ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering))
+      ParseScopeAndOrdering(true /*Always atomic*/, Scope, SuccessOrdering) ||
+      ParseOrdering(FailureOrdering))
     return true;
 
-  if (Ordering == Unordered)
+  if (SuccessOrdering == Unordered || FailureOrdering == Unordered)
     return TokError("cmpxchg cannot be unordered");
+  if (SuccessOrdering < FailureOrdering)
+    return TokError("cmpxchg must be at least as ordered on success as failure");
+  if (FailureOrdering == Release || FailureOrdering == AcquireRelease)
+    return TokError("cmpxchg failure ordering cannot include release semantics");
   if (!Ptr->getType()->isPointerTy())
     return Error(PtrLoc, "cmpxchg operand must be a pointer");
   if (cast<PointerType>(Ptr->getType())->getElementType() != Cmp->getType())
@@ -4227,8 +4242,8 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
     return Error(NewLoc, "cmpxchg operand must be power-of-two byte-sized"
                          " integer");
 
-  AtomicCmpXchgInst *CXI =
-    new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, Scope);
+  AtomicCmpXchgInst *CXI = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering,
+                                                 FailureOrdering, Scope);
   CXI->setVolatile(isVolatile);
   Inst = CXI;
   return AteExtraComma ? InstExtraComma : InstNormal;
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 790ffd2c710..294a1e14360 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -209,6 +209,7 @@ namespace llvm {
     bool ParseOptionalAlignment(unsigned &Alignment);
     bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
                                AtomicOrdering &Ordering);
+    bool ParseOrdering(AtomicOrdering &Ordering);
     bool ParseOptionalStackAlignment(unsigned &Alignment);
     bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
     bool ParseOptionalCommaInAlloca(bool &IsInAlloca);
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 5e358d9cc54..eb716660eb4 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2882,7 +2882,8 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
       break;
     }
     case bitc::FUNC_CODE_INST_CMPXCHG: {
-      // CMPXCHG:[ptrty, ptr, cmp, new, vol, ordering, synchscope]
+      // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, synchscope,
+      //          failureordering]
       unsigned OpNum = 0;
       Value *Ptr, *Cmp, *New;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
@@ -2890,13 +2891,22 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
                     cast<PointerType>(Ptr->getType())->getElementType(), Cmp) ||
           popValue(Record, OpNum, NextValueNo,
                     cast<PointerType>(Ptr->getType())->getElementType(), New) ||
-          OpNum+3 != Record.size())
+          (OpNum + 3 != Record.size() && OpNum + 4 != Record.size()))
         return Error(InvalidRecord);
-      AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+1]);
-      if (Ordering == NotAtomic || Ordering == Unordered)
+      AtomicOrdering SuccessOrdering = GetDecodedOrdering(Record[OpNum+1]);
+      if (SuccessOrdering == NotAtomic || SuccessOrdering == Unordered)
         return Error(InvalidRecord);
       SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+2]);
-      I = new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, SynchScope);
+
+      AtomicOrdering FailureOrdering;
+      if (Record.size() < 7)
+        FailureOrdering =
+            AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering);
+      else
+        FailureOrdering = GetDecodedOrdering(Record[OpNum+3]);
+
+      I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering,
+                                SynchScope);
       cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
       InstructionList.push_back(I);
       break;
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 8a09507c29c..d390eedd362 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1441,9 +1441,11 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     pushValue(I.getOperand(2), InstID, Vals, VE);         // newval.
     Vals.push_back(cast<AtomicCmpXchgInst>(I).isVolatile());
     Vals.push_back(GetEncodedOrdering(
-                     cast<AtomicCmpXchgInst>(I).getOrdering()));
+                     cast<AtomicCmpXchgInst>(I).getSuccessOrdering()));
     Vals.push_back(GetEncodedSynchScope(
                      cast<AtomicCmpXchgInst>(I).getSynchScope()));
+    Vals.push_back(GetEncodedOrdering(
+                     cast<AtomicCmpXchgInst>(I).getFailureOrdering()));
     break;
   case Instruction::AtomicRMW:
     Code = bitc::FUNC_CODE_INST_ATOMICRMW;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2dcfbb5b27d..6297774484f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2878,6 +2878,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                  Node->getOperand(1), Zero, Zero,
                                  cast<AtomicSDNode>(Node)->getMemOperand(),
                                  cast<AtomicSDNode>(Node)->getOrdering(),
+                                 cast<AtomicSDNode>(Node)->getOrdering(),
                                  cast<AtomicSDNode>(Node)->getSynchScope());
     Results.push_back(Swap.getValue(0));
     Results.push_back(Swap.getValue(1));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index cd7c4963759..18b2376b8b0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -193,10 +193,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
   SDValue Op2 = GetPromotedInteger(N->getOperand(2));
   SDValue Op3 = GetPromotedInteger(N->getOperand(3));
-  SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
-                              N->getMemoryVT(), N->getChain(), N->getBasePtr(),
-                              Op2, Op3, N->getMemOperand(), N->getOrdering(),
-                              N->getSynchScope());
+  SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
+                              N->getChain(), N->getBasePtr(), Op2, Op3,
+                              N->getMemOperand(), N->getSuccessOrdering(),
+                              N->getFailureOrdering(), N->getSynchScope());
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -2448,6 +2448,7 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
                                N->getOperand(1), Zero, Zero,
                                cast<AtomicSDNode>(N)->getMemOperand(),
                                cast<AtomicSDNode>(N)->getOrdering(),
+                               cast<AtomicSDNode>(N)->getOrdering(),
                                cast<AtomicSDNode>(N)->getSynchScope());
   ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
   ReplaceValueWith(SDValue(N, 1), Swap.getValue(1));
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9a9062af9d7..43a02fe9c7e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4223,9 +4223,10 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
-                                SDVTList VTList, SDValue* Ops, unsigned NumOps,
+                                SDVTList VTList, SDValue *Ops, unsigned NumOps,
                                 MachineMemOperand *MMO,
-                                AtomicOrdering Ordering,
+                                AtomicOrdering SuccessOrdering,
+                                AtomicOrdering FailureOrdering,
                                 SynchronizationScope SynchScope) {
   FoldingSetNodeID ID;
   ID.AddInteger(MemVT.getRawBits());
@@ -4247,17 +4248,28 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
   SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(),
                                                dl.getDebugLoc(), VTList, MemVT,
                                                Ops, DynOps, NumOps, MMO,
-                                               Ordering, SynchScope);
+                                               SuccessOrdering, FailureOrdering,
+                                               SynchScope);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
+SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
+                                SDVTList VTList, SDValue *Ops, unsigned NumOps,
+                                MachineMemOperand *MMO,
+                                AtomicOrdering Ordering,
+                                SynchronizationScope SynchScope) {
+  return getAtomic(Opcode, dl, MemVT, VTList, Ops, NumOps, MMO, Ordering,
+                   Ordering, SynchScope);
+}
+
 SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
                                 SDValue Chain, SDValue Ptr, SDValue Cmp,
                                 SDValue Swp, MachinePointerInfo PtrInfo,
                                 unsigned Alignment,
-                                AtomicOrdering Ordering,
+                                AtomicOrdering SuccessOrdering,
+                                AtomicOrdering FailureOrdering,
                                 SynchronizationScope SynchScope) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(MemVT);
@@ -4278,14 +4290,15 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
     MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
 
   return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO,
-                   Ordering, SynchScope);
+                   SuccessOrdering, FailureOrdering, SynchScope);
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
                                 SDValue Chain,
                                 SDValue Ptr, SDValue Cmp,
                                 SDValue Swp, MachineMemOperand *MMO,
-                                AtomicOrdering Ordering,
+                                AtomicOrdering SuccessOrdering,
+                                AtomicOrdering FailureOrdering,
                                 SynchronizationScope SynchScope) {
   assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
   assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
@@ -4294,7 +4307,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
 
   SDVTList VTs = getVTList(VT, MVT::Other);
   SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
-  return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope);
+  return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, SuccessOrdering,
+                   FailureOrdering, SynchScope);
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 13daba08eea..b60e7803b79 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3605,14 +3605,15 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
 
 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
   SDLoc dl = getCurSDLoc();
-  AtomicOrdering Order = I.getOrdering();
+  AtomicOrdering SuccessOrder = I.getSuccessOrdering();
+  AtomicOrdering FailureOrder = I.getFailureOrdering();
   SynchronizationScope Scope = I.getSynchScope();
 
   SDValue InChain = getRoot();
 
   const TargetLowering *TLI = TM.getTargetLowering();
   if (TLI->getInsertFencesForAtomic())
-    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+    InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl,
                                    DAG, *TLI);
 
   SDValue L =
@@ -3623,13 +3624,14 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
                   getValue(I.getCompareOperand()),
                   getValue(I.getNewValOperand()),
                   MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
-                  TLI->getInsertFencesForAtomic() ? Monotonic : Order,
+                  TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder,
+                  TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder,
                   Scope);
 
   SDValue OutChain = L.getValue(1);
 
   if (TLI->getInsertFencesForAtomic())
-    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+    OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl,
                                     DAG, *TLI);
 
   setValue(&I, L);
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index c9ea49b2224..a528e5f326b 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -1226,6 +1226,37 @@ void AssemblyWriter::writeAtomic(AtomicOrdering Ordering,
   }
 }
 
+void AssemblyWriter::writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
+                                        AtomicOrdering FailureOrdering,
+                                        SynchronizationScope SynchScope) {
+  assert(SuccessOrdering != NotAtomic && FailureOrdering != NotAtomic);
+
+  switch (SynchScope) {
+  case SingleThread: Out << " singlethread"; break;
+  case CrossThread: break;
+  }
+
+  switch (SuccessOrdering) {
+  default: Out << " <bad ordering " << int(SuccessOrdering) << ">"; break;
+  case Unordered: Out << " unordered"; break;
+  case Monotonic: Out << " monotonic"; break;
+  case Acquire: Out << " acquire"; break;
+  case Release: Out << " release"; break;
+  case AcquireRelease: Out << " acq_rel"; break;
+  case SequentiallyConsistent: Out << " seq_cst"; break;
+  }
+
+  switch (FailureOrdering) {
+  default: Out << " <bad ordering " << int(FailureOrdering) << ">"; break;
+  case Unordered: Out << " unordered"; break;
+  case Monotonic: Out << " monotonic"; break;
+  case Acquire: Out << " acquire"; break;
+  case Release: Out << " release"; break;
+  case AcquireRelease: Out << " acq_rel"; break;
+  case SequentiallyConsistent: Out << " seq_cst"; break;
+  }
+}
+
 void AssemblyWriter::writeParamOperand(const Value *Operand,
                                        AttributeSet Attrs, unsigned Idx) {
   if (Operand == 0) {
@@ -2018,7 +2049,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     if (SI->getAlignment())
       Out << ", align " << SI->getAlignment();
   } else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&I)) {
-    writeAtomic(CXI->getOrdering(), CXI->getSynchScope());
+    writeAtomicCmpXchg(CXI->getSuccessOrdering(), CXI->getFailureOrdering(),
+                       CXI->getSynchScope());
   } else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I)) {
     writeAtomic(RMWI->getOrdering(), RMWI->getSynchScope());
   } else if (const FenceInst *FI = dyn_cast<FenceInst>(&I)) {
diff --git a/lib/IR/AsmWriter.h b/lib/IR/AsmWriter.h
index 222d3a490fd..b4ce6de10dd 100644
--- a/lib/IR/AsmWriter.h
+++ b/lib/IR/AsmWriter.h
@@ -90,6 +90,9 @@ public:
   void writeOperand(const Value *Op, bool PrintType);
   void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx);
   void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
+  void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
+                          AtomicOrdering FailureOrdering,
+                          SynchronizationScope SynchScope);
 
   void writeAllMDNodes();
   void writeMDNode(unsigned Slot, const MDNode *Node);
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index bd7a62e83d9..d31a92e0317 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -313,7 +313,10 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
            FI->getSynchScope() == cast<FenceInst>(FI)->getSynchScope();
   if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this))
     return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() &&
-           CXI->getOrdering() == cast<AtomicCmpXchgInst>(I)->getOrdering() &&
+           CXI->getSuccessOrdering() ==
+               cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() &&
+           CXI->getFailureOrdering() ==
+               cast<AtomicCmpXchgInst>(I)->getFailureOrdering() &&
            CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope();
   if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this))
     return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() &&
@@ -384,7 +387,10 @@ bool Instruction::isSameOperationAs(const Instruction *I,
            FI->getSynchScope() == cast<FenceInst>(I)->getSynchScope();
   if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this))
     return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() &&
-           CXI->getOrdering() == cast<AtomicCmpXchgInst>(I)->getOrdering() &&
+           CXI->getSuccessOrdering() ==
+               cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() &&
+           CXI->getFailureOrdering() ==
+               cast<AtomicCmpXchgInst>(I)->getFailureOrdering() &&
            CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope();
   if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this))
     return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() &&
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index d874411ccd3..3aa8413541c 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -1216,12 +1216,14 @@ void StoreInst::setAlignment(unsigned Align) {
 //===----------------------------------------------------------------------===//
 
 void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal,
-                             AtomicOrdering Ordering,
+                             AtomicOrdering SuccessOrdering,
+                             AtomicOrdering FailureOrdering,
                              SynchronizationScope SynchScope) {
   Op<0>() = Ptr;
   Op<1>() = Cmp;
   Op<2>() = NewVal;
-  setOrdering(Ordering);
+  setSuccessOrdering(SuccessOrdering);
+  setFailureOrdering(FailureOrdering);
   setSynchScope(SynchScope);
 
   assert(getOperand(0) && getOperand(1) && getOperand(2) &&
@@ -1234,30 +1236,38 @@ void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal,
   assert(getOperand(2)->getType() ==
                  cast<PointerType>(getOperand(0)->getType())->getElementType()
          && "Ptr must be a pointer to NewVal type!");
-  assert(Ordering != NotAtomic &&
+  assert(SuccessOrdering != NotAtomic &&
+         "AtomicCmpXchg instructions must be atomic!");
+  assert(FailureOrdering != NotAtomic &&
          "AtomicCmpXchg instructions must be atomic!");
+  assert(SuccessOrdering >= FailureOrdering &&
+         "AtomicCmpXchg success ordering must be at least as strong as fail");
+  assert(FailureOrdering != Release && FailureOrdering != AcquireRelease &&
+         "AtomicCmpXchg failure ordering cannot include release semantics");
 }
 
 AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
-                                     AtomicOrdering Ordering,
+                                     AtomicOrdering SuccessOrdering,
+                                     AtomicOrdering FailureOrdering,
                                      SynchronizationScope SynchScope,
                                      Instruction *InsertBefore)
   : Instruction(Cmp->getType(), AtomicCmpXchg,
                 OperandTraits<AtomicCmpXchgInst>::op_begin(this),
                 OperandTraits<AtomicCmpXchgInst>::operands(this),
                 InsertBefore) {
-  Init(Ptr, Cmp, NewVal, Ordering, SynchScope);
+  Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope);
 }
 
 AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
-                                     AtomicOrdering Ordering,
+                                     AtomicOrdering SuccessOrdering,
+                                     AtomicOrdering FailureOrdering,
                                      SynchronizationScope SynchScope,
                                      BasicBlock *InsertAtEnd)
   : Instruction(Cmp->getType(), AtomicCmpXchg,
                 OperandTraits<AtomicCmpXchgInst>::op_begin(this),
                 OperandTraits<AtomicCmpXchgInst>::operands(this),
                 InsertAtEnd) {
-  Init(Ptr, Cmp, NewVal, Ordering, SynchScope);
+  Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope);
 }
  
 //===----------------------------------------------------------------------===//
@@ -3596,7 +3606,8 @@ StoreInst *StoreInst::clone_impl() const {
 AtomicCmpXchgInst *AtomicCmpXchgInst::clone_impl() const {
   AtomicCmpXchgInst *Result =
     new AtomicCmpXchgInst(getOperand(0), getOperand(1), getOperand(2),
-                          getOrdering(), getSynchScope());
+                          getSuccessOrdering(), getFailureOrdering(),
+                          getSynchScope());
   Result->setVolatile(isVolatile());
   return Result;
 }
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 158601226ab..4bdc1c13d03 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -1829,10 +1829,23 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
 }
 
 void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
-  Assert1(CXI.getOrdering() != NotAtomic,
+
+  // FIXME: more conditions???
+  Assert1(CXI.getSuccessOrdering() != NotAtomic,
+          "cmpxchg instructions must be atomic.", &CXI);
+  Assert1(CXI.getFailureOrdering() != NotAtomic,
           "cmpxchg instructions must be atomic.", &CXI);
-  Assert1(CXI.getOrdering() != Unordered,
+  Assert1(CXI.getSuccessOrdering() != Unordered,
           "cmpxchg instructions cannot be unordered.", &CXI);
+  Assert1(CXI.getFailureOrdering() != Unordered,
+          "cmpxchg instructions cannot be unordered.", &CXI);
+  Assert1(CXI.getSuccessOrdering() >= CXI.getFailureOrdering(),
+          "cmpxchg instructions be at least as constrained on success as fail",
+          &CXI);
+  Assert1(CXI.getFailureOrdering() != Release &&
+              CXI.getFailureOrdering() != AcquireRelease,
+          "cmpxchg failure ordering cannot include release semantics", &CXI);
+
   PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType());
   Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI);
   Type *ElTy = PTy->getElementType();
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 691961ef2af..aa7ca7f91ff 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -6054,10 +6054,10 @@ ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
                               Node->getOperand(i), DAG.getIntPtrConstant(1)));
   }
   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
-  SDValue Result =
-    DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(),
-                  cast<MemSDNode>(Node)->getMemOperand(), AN->getOrdering(),
-                  AN->getSynchScope());
+  SDValue Result = DAG.getAtomic(
+      Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(),
+      cast<MemSDNode>(Node)->getMemOperand(), AN->getSuccessOrdering(),
+      AN->getFailureOrdering(), AN->getSynchScope());
   SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
   Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
   Results.push_back(Result.getValue(2));
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index b13709914bf..31585d9296e 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1567,12 +1567,16 @@ void CppWriter::printInstruction(const Instruction *I,
   }
   case Instruction::AtomicCmpXchg: {
     const AtomicCmpXchgInst *cxi = cast<AtomicCmpXchgInst>(I);
-    StringRef Ordering = ConvertAtomicOrdering(cxi->getOrdering());
+    StringRef SuccessOrdering =
+        ConvertAtomicOrdering(cxi->getSuccessOrdering());
+    StringRef FailureOrdering =
+        ConvertAtomicOrdering(cxi->getFailureOrdering());
     StringRef CrossThread = ConvertAtomicSynchScope(cxi->getSynchScope());
     Out << "AtomicCmpXchgInst* " << iName
         << " = new AtomicCmpXchgInst("
         << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", "
-        << Ordering << ", " << CrossThread << ", " << bbname
+        << SuccessOrdering << ", " << FailureOrdering << ", "
+        << CrossThread << ", " << bbname
         << ");";
     nl(Out) << iName << "->setName(\"";
     printEscapedString(cxi->getName());
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 07c83e8b294..f651205bf3d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -13805,6 +13805,7 @@ static void ReplaceATOMIC_LOAD(SDNode *Node,
                                Node->getOperand(1), Zero, Zero,
                                cast<AtomicSDNode>(Node)->getMemOperand(),
                                cast<AtomicSDNode>(Node)->getOrdering(),
+                               cast<AtomicSDNode>(Node)->getOrdering(),
                                cast<AtomicSDNode>(Node)->getSynchScope());
   Results.push_back(Swap.getValue(0));
   Results.push_back(Swap.getValue(1));
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 64f63773f6c..ff16b1ecdf8 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -341,7 +341,10 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
            FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope();
   if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1))
     return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() &&
-           CXI->getOrdering() == cast<AtomicCmpXchgInst>(I2)->getOrdering() &&
+           CXI->getSuccessOrdering() ==
+               cast<AtomicCmpXchgInst>(I2)->getSuccessOrdering() &&
+           CXI->getFailureOrdering() ==
+               cast<AtomicCmpXchgInst>(I2)->getFailureOrdering() &&
            CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope();
   if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1))
     return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() &&
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index b4ae443475b..b158f1f10af 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1088,7 +1088,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
     handleCASOrRMW(I);
-    I.setOrdering(addReleaseOrdering(I.getOrdering()));
+    I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
   }
 
   // Vector manipulation.
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index fed7508dbc9..5ffb17cbf3b 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -446,21 +446,6 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
   return IRB->getInt32(v);
 }
 
-static ConstantInt *createFailOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
-  uint32_t v = 0;
-  switch (ord) {
-    case NotAtomic:              assert(false);
-    case Unordered:              // Fall-through.
-    case Monotonic:              v = 0; break;
-    // case Consume:                v = 1; break;  // Not specified yet.
-    case Acquire:                v = 2; break;
-    case Release:                v = 0; break;
-    case AcquireRelease:         v = 2; break;
-    case SequentiallyConsistent: v = 5; break;
-  }
-  return IRB->getInt32(v);
-}
-
 // If a memset intrinsic gets inlined by the code gen, we will miss races on it.
 // So, we either need to ensure the intrinsic is not inlined, or instrument it.
 // We do not instrument memset/memmove/memcpy intrinsics (too complicated),
@@ -556,8 +541,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
     Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
                      IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false),
                      IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false),
-                     createOrdering(&IRB, CASI->getOrdering()),
-                     createFailOrdering(&IRB, CASI->getOrdering())};
+                     createOrdering(&IRB, CASI->getSuccessOrdering()),
+                     createOrdering(&IRB, CASI->getFailureOrdering())};
     CallInst *C = CallInst::Create(TsanAtomicCAS[Idx], ArrayRef<Value*>(Args));
     ReplaceInstWithInst(I, C);
   } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
diff --git a/test/Assembler/atomic.ll b/test/Assembler/atomic.ll
index b245cdea75b..a2ae58e296e 100644
--- a/test/Assembler/atomic.ll
+++ b/test/Assembler/atomic.ll
@@ -10,10 +10,12 @@ define void @f(i32* %x) {
   store atomic i32 3, i32* %x release, align 4
   ; CHECK: store atomic volatile i32 3, i32* %x singlethread monotonic, align 4
   store atomic volatile i32 3, i32* %x singlethread monotonic, align 4
-  ; CHECK: cmpxchg i32* %x, i32 1, i32 0 singlethread monotonic
-  cmpxchg i32* %x, i32 1, i32 0 singlethread monotonic
-  ; CHECK: cmpxchg volatile i32* %x, i32 0, i32 1 acq_rel
-  cmpxchg volatile i32* %x, i32 0, i32 1 acq_rel
+  ; CHECK: cmpxchg i32* %x, i32 1, i32 0 singlethread monotonic monotonic
+  cmpxchg i32* %x, i32 1, i32 0 singlethread monotonic monotonic
+  ; CHECK: cmpxchg volatile i32* %x, i32 0, i32 1 acq_rel acquire
+  cmpxchg volatile i32* %x, i32 0, i32 1 acq_rel acquire
+  ; CHECK: cmpxchg i32* %x, i32 42, i32 0 acq_rel monotonic
+  cmpxchg i32* %x, i32 42, i32 0 acq_rel monotonic
   ; CHECK: atomicrmw add i32* %x, i32 10 seq_cst
   atomicrmw add i32* %x, i32 10 seq_cst
   ; CHECK: atomicrmw volatile xchg  i32* %x, i32 10 monotonic
diff --git a/test/Bitcode/cmpxchg-upgrade.ll b/test/Bitcode/cmpxchg-upgrade.ll
new file mode 100644
index 00000000000..d36ac1c1790
--- /dev/null
+++ b/test/Bitcode/cmpxchg-upgrade.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+; cmpxchg-upgrade.ll.bc was produced by running a version of llvm-as from just
+; before the IR change on this file.
+
+define void @test(i32* %addr) {
+   cmpxchg i32* %addr, i32 42, i32 0 monotonic
+; CHECK: cmpxchg i32* %addr, i32 42, i32 0 monotonic monotonic
+
+   cmpxchg i32* %addr, i32 42, i32 0 acquire
+; CHECK: cmpxchg i32* %addr, i32 42, i32 0 acquire acquire
+
+   cmpxchg i32* %addr, i32 42, i32 0 release
+; CHECK: cmpxchg i32* %addr, i32 42, i32 0 release monotonic
+
+   cmpxchg i32* %addr, i32 42, i32 0 acq_rel
+; CHECK: cmpxchg i32* %addr, i32 42, i32 0 acq_rel acquire
+
+   cmpxchg i32* %addr, i32 42, i32 0 seq_cst
+; CHECK: cmpxchg i32* %addr, i32 42, i32 0 seq_cst seq_cst
+
+   ret void
+}
\ No newline at end of file
diff --git a/test/Bitcode/cmpxchg-upgrade.ll.bc b/test/Bitcode/cmpxchg-upgrade.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..922f2eb84edfa411cb81d623caf4c1eaf0693165
GIT binary patch
literal 360
zcmZ>AK5$Qwhk+rSfq{X$Nr8b0NDBcmd!zD1#}h1`Yyw7>lNeigR9QJB<yg9t8hBip
zoF;KQr3e^_Sa3*qav8a(cyLWnR6Y{az$2+xq{4pUgh0}Y1uGnmeP9Hd)2YC~zy+j@
zlumFcq_nb3R$S5oBp$acoFF3P)7Hb?lHwyEqRddtlHj1p<ihMM0dfHdG}r*0#UKvE
z#vBbYM;^+gi7?uBI9s$F?r~~hudHA%o>9p2jDi2H0N<AeK4TsyxkCqK&n=WmGhhWP
zYnDF4Y<t4lwujmFOhr#g0edwEd!<KvK>>SJ0drYJdw~Kw(Ck752C$#m9DG_#fDTRs
z@`Zuo!YqzD0zew%nkkE<Kwe=8W;>?9(jed?DRD%B!Py`+LF5R7z`-evi6SfkEI`Rb
Xkp{s83jwemF$adj0$^ProeT^Bj%-um

literal 0
HcmV?d00001

diff --git a/test/Bitcode/memInstructions.3.2.ll b/test/Bitcode/memInstructions.3.2.ll
index 868e4b5f4d1..21c3deb8a5a 100644
--- a/test/Bitcode/memInstructions.3.2.ll
+++ b/test/Bitcode/memInstructions.3.2.ll
@@ -223,69 +223,69 @@ define void @cmpxchg(i32* %ptr,i32 %cmp,i32 %new){
 entry:
   ;cmpxchg [volatile] <ty>* <pointer>, <ty> <cmp>, <ty> <new> [singlethread] <ordering>
 
-; CHECK: %res1 = cmpxchg i32* %ptr, i32 %cmp, i32 %new monotonic
-  %res1 = cmpxchg i32* %ptr, i32 %cmp, i32 %new monotonic
+; CHECK: %res1 = cmpxchg i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
+  %res1 = cmpxchg i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
   
-; CHECK-NEXT: %res2 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new monotonic
-  %res2 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new monotonic
+; CHECK-NEXT: %res2 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
+  %res2 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
   
-; CHECK-NEXT: %res3 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread monotonic
-  %res3 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread monotonic
+; CHECK-NEXT: %res3 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
+  %res3 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
   
-; CHECK-NEXT: %res4 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread monotonic
-  %res4 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread monotonic
+; CHECK-NEXT: %res4 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
+  %res4 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
   
   
-; CHECK-NEXT: %res5 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acquire
-  %res5 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acquire
+; CHECK-NEXT: %res5 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acquire acquire
+  %res5 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acquire acquire
   
-; CHECK-NEXT: %res6 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acquire
-  %res6 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acquire
+; CHECK-NEXT: %res6 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acquire acquire
+  %res6 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acquire acquire
   
-; CHECK-NEXT: %res7 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acquire
-  %res7 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acquire
+; CHECK-NEXT: %res7 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
+  %res7 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
   
-; CHECK-NEXT: %res8 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acquire
-  %res8 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acquire
+; CHECK-NEXT: %res8 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
+  %res8 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
   
   
-; CHECK-NEXT: %res9 = cmpxchg i32* %ptr, i32 %cmp, i32 %new release
-  %res9 = cmpxchg i32* %ptr, i32 %cmp, i32 %new release
+; CHECK-NEXT: %res9 = cmpxchg i32* %ptr, i32 %cmp, i32 %new release monotonic
+  %res9 = cmpxchg i32* %ptr, i32 %cmp, i32 %new release monotonic
   
-; CHECK-NEXT: %res10 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new release
-  %res10 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new release
+; CHECK-NEXT: %res10 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new release monotonic
+  %res10 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new release monotonic
   
-; CHECK-NEXT: %res11 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread release
-  %res11 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread release
+; CHECK-NEXT: %res11 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
+  %res11 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
   
-; CHECK-NEXT: %res12 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread release
-  %res12 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread release
+; CHECK-NEXT: %res12 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
+  %res12 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
   
   
-; CHECK-NEXT: %res13 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acq_rel
-  %res13 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acq_rel
+; CHECK-NEXT: %res13 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
+  %res13 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
   
-; CHECK-NEXT: %res14 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acq_rel
-  %res14 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acq_rel
+; CHECK-NEXT: %res14 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
+  %res14 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
   
-; CHECK-NEXT: %res15 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel
-  %res15 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel
+; CHECK-NEXT: %res15 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
+  %res15 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
   
-; CHECK-NEXT: %res16 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel
-  %res16 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel
+; CHECK-NEXT: %res16 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
+  %res16 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
   
   
-; CHECK-NEXT: %res17 = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst
-  %res17 = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst
+; CHECK-NEXT: %res17 = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
+  %res17 = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
   
-; CHECK-NEXT: %res18 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new seq_cst
-  %res18 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new seq_cst
+; CHECK-NEXT: %res18 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
+  %res18 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
   
-; CHECK-NEXT: %res19 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst
-  %res19 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst
+; CHECK-NEXT: %res19 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
+  %res19 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
   
-; CHECK-NEXT: %res20 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst
-  %res20 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst
+; CHECK-NEXT: %res20 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
+  %res20 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
 
   ret void
 }
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index 5857faf80a1..5fe29364ee3 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -897,7 +897,7 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8:
-   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire
+   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
@@ -920,7 +920,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 
 define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16:
-   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
+   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
@@ -943,7 +943,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 
 define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32:
-   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release
+   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
@@ -966,7 +966,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 
 define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64:
-   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic
+   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index 0477d4f4016..7cf45ebde76 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -171,7 +171,7 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
-  %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst
+  %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst
   ret i64 %r
 }
 
diff --git a/test/CodeGen/ARM/atomic-cmp.ll b/test/CodeGen/ARM/atomic-cmp.ll
index 51ada693d0b..a4738077b1a 100644
--- a/test/CodeGen/ARM/atomic-cmp.ll
+++ b/test/CodeGen/ARM/atomic-cmp.ll
@@ -10,6 +10,6 @@ define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
 ; T2-LABEL: t:
 ; T2: ldrexb
 ; T2: strexb
-  %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic
+  %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic monotonic
   ret i8 %tmp0
 }
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
index 3f93929fd19..87e76a48c5e 100644
--- a/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -987,7 +987,7 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8:
-   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire
+   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
@@ -1013,7 +1013,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 
 define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16:
-   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
+   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
@@ -1039,7 +1039,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 
 define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i32:
-   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release
+   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
@@ -1065,7 +1065,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 
 define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64:
-   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic
+   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index 0e60fe1fbfb..77d7bf31545 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -77,7 +77,7 @@ entry:
   %newval.addr = alloca i32, align 4
   store i32 %newval, i32* %newval.addr, align 4
   %tmp = load i32* %newval.addr, align 4
-  %0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic
+  %0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic monotonic
   ret i32 %0
 
 ; CHECK-EL-LABEL:   AtomicCmpSwap32:
@@ -333,7 +333,7 @@ entry:
 
 define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind {
 entry:
-  %0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic
+  %0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic monotonic
   ret i8 %0
 
 ; CHECK-EL-LABEL:   AtomicCmpSwap8:
@@ -429,7 +429,7 @@ entry:
 
 define i32 @zeroreg() nounwind {
 entry:
-  %0 = cmpxchg i32* @a, i32 1, i32 0 seq_cst
+  %0 = cmpxchg i32* @a, i32 1, i32 0 seq_cst seq_cst
   %1 = icmp eq i32 %0, 1
   %conv = zext i1 %1 to i32
   ret i32 %conv
diff --git a/test/CodeGen/Mips/atomicops.ll b/test/CodeGen/Mips/atomicops.ll
index 0f0f01afc14..dc07c637418 100644
--- a/test/CodeGen/Mips/atomicops.ll
+++ b/test/CodeGen/Mips/atomicops.ll
@@ -20,7 +20,7 @@ entry:
   %add.i = add nsw i32 %0, 2
   %1 = load volatile i32* %x, align 4
   %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %add.i, i32 %1) nounwind
-  %2 = cmpxchg i32* %x, i32 1, i32 2 seq_cst
+  %2 = cmpxchg i32* %x, i32 1, i32 2 seq_cst seq_cst
   %3 = load volatile i32* %x, align 4
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3) nounwind
   %4 = atomicrmw xchg i32* %x, i32 1 seq_cst
diff --git a/test/CodeGen/PowerPC/Atomics-32.ll b/test/CodeGen/PowerPC/Atomics-32.ll
index 64f149541be..b5c03e2b202 100644
--- a/test/CodeGen/PowerPC/Atomics-32.ll
+++ b/test/CodeGen/PowerPC/Atomics-32.ll
@@ -529,63 +529,63 @@ define void @test_compare_and_swap() nounwind {
 entry:
   %0 = load i8* @uc, align 1
   %1 = load i8* @sc, align 1
-  %2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic
+  %2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic monotonic
   store i8 %2, i8* @sc, align 1
   %3 = load i8* @uc, align 1
   %4 = load i8* @sc, align 1
-  %5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic
+  %5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic monotonic
   store i8 %5, i8* @uc, align 1
   %6 = load i8* @uc, align 1
   %7 = zext i8 %6 to i16
   %8 = load i8* @sc, align 1
   %9 = sext i8 %8 to i16
   %10 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic
+  %11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic monotonic
   store i16 %11, i16* @ss, align 2
   %12 = load i8* @uc, align 1
   %13 = zext i8 %12 to i16
   %14 = load i8* @sc, align 1
   %15 = sext i8 %14 to i16
   %16 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic
+  %17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic monotonic
   store i16 %17, i16* @us, align 2
   %18 = load i8* @uc, align 1
   %19 = zext i8 %18 to i32
   %20 = load i8* @sc, align 1
   %21 = sext i8 %20 to i32
   %22 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic
+  %23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic monotonic
   store i32 %23, i32* @si, align 4
   %24 = load i8* @uc, align 1
   %25 = zext i8 %24 to i32
   %26 = load i8* @sc, align 1
   %27 = sext i8 %26 to i32
   %28 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic
+  %29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic monotonic
   store i32 %29, i32* @ui, align 4
   %30 = load i8* @uc, align 1
   %31 = zext i8 %30 to i32
   %32 = load i8* @sc, align 1
   %33 = sext i8 %32 to i32
   %34 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %35 = cmpxchg i32* %34, i32 %31, i32 %33 monotonic
+  %35 = cmpxchg i32* %34, i32 %31, i32 %33 monotonic monotonic
   store i32 %35, i32* @sl, align 4
   %36 = load i8* @uc, align 1
   %37 = zext i8 %36 to i32
   %38 = load i8* @sc, align 1
   %39 = sext i8 %38 to i32
   %40 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %41 = cmpxchg i32* %40, i32 %37, i32 %39 monotonic
+  %41 = cmpxchg i32* %40, i32 %37, i32 %39 monotonic monotonic
   store i32 %41, i32* @ul, align 4
   %42 = load i8* @uc, align 1
   %43 = load i8* @sc, align 1
-  %44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic
+  %44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic monotonic
   %45 = icmp eq i8 %44, %42
   %46 = zext i1 %45 to i32
   store i32 %46, i32* @ui, align 4
   %47 = load i8* @uc, align 1
   %48 = load i8* @sc, align 1
-  %49 = cmpxchg i8* @uc, i8 %47, i8 %48 monotonic
+  %49 = cmpxchg i8* @uc, i8 %47, i8 %48 monotonic monotonic
   %50 = icmp eq i8 %49, %47
   %51 = zext i1 %50 to i32
   store i32 %51, i32* @ui, align 4
@@ -594,7 +594,7 @@ entry:
   %54 = load i8* @sc, align 1
   %55 = sext i8 %54 to i16
   %56 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %57 = cmpxchg i16* %56, i16 %53, i16 %55 monotonic
+  %57 = cmpxchg i16* %56, i16 %53, i16 %55 monotonic monotonic
   %58 = icmp eq i16 %57, %53
   %59 = zext i1 %58 to i32
   store i32 %59, i32* @ui, align 4
@@ -603,7 +603,7 @@ entry:
   %62 = load i8* @sc, align 1
   %63 = sext i8 %62 to i16
   %64 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %65 = cmpxchg i16* %64, i16 %61, i16 %63 monotonic
+  %65 = cmpxchg i16* %64, i16 %61, i16 %63 monotonic monotonic
   %66 = icmp eq i16 %65, %61
   %67 = zext i1 %66 to i32
   store i32 %67, i32* @ui, align 4
@@ -612,7 +612,7 @@ entry:
   %70 = load i8* @sc, align 1
   %71 = sext i8 %70 to i32
   %72 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %73 = cmpxchg i32* %72, i32 %69, i32 %71 monotonic
+  %73 = cmpxchg i32* %72, i32 %69, i32 %71 monotonic monotonic
   %74 = icmp eq i32 %73, %69
   %75 = zext i1 %74 to i32
   store i32 %75, i32* @ui, align 4
@@ -621,7 +621,7 @@ entry:
   %78 = load i8* @sc, align 1
   %79 = sext i8 %78 to i32
   %80 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %81 = cmpxchg i32* %80, i32 %77, i32 %79 monotonic
+  %81 = cmpxchg i32* %80, i32 %77, i32 %79 monotonic monotonic
   %82 = icmp eq i32 %81, %77
   %83 = zext i1 %82 to i32
   store i32 %83, i32* @ui, align 4
@@ -630,7 +630,7 @@ entry:
   %86 = load i8* @sc, align 1
   %87 = sext i8 %86 to i32
   %88 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %89 = cmpxchg i32* %88, i32 %85, i32 %87 monotonic
+  %89 = cmpxchg i32* %88, i32 %85, i32 %87 monotonic monotonic
   %90 = icmp eq i32 %89, %85
   %91 = zext i1 %90 to i32
   store i32 %91, i32* @ui, align 4
@@ -639,7 +639,7 @@ entry:
   %94 = load i8* @sc, align 1
   %95 = sext i8 %94 to i32
   %96 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %97 = cmpxchg i32* %96, i32 %93, i32 %95 monotonic
+  %97 = cmpxchg i32* %96, i32 %93, i32 %95 monotonic monotonic
   %98 = icmp eq i32 %97, %93
   %99 = zext i1 %98 to i32
   store i32 %99, i32* @ui, align 4
diff --git a/test/CodeGen/PowerPC/Atomics-64.ll b/test/CodeGen/PowerPC/Atomics-64.ll
index d35b8487470..122b54e080a 100644
--- a/test/CodeGen/PowerPC/Atomics-64.ll
+++ b/test/CodeGen/PowerPC/Atomics-64.ll
@@ -536,64 +536,64 @@ define void @test_compare_and_swap() nounwind {
 entry:
   %0 = load i8* @uc, align 1
   %1 = load i8* @sc, align 1
-  %2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic
+  %2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic monotonic
   store i8 %2, i8* @sc, align 1
   %3 = load i8* @uc, align 1
   %4 = load i8* @sc, align 1
-  %5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic
+  %5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic monotonic
   store i8 %5, i8* @uc, align 1
   %6 = load i8* @uc, align 1
   %7 = zext i8 %6 to i16
   %8 = load i8* @sc, align 1
   %9 = sext i8 %8 to i16
   %10 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic
+  %11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic monotonic
   store i16 %11, i16* @ss, align 2
   %12 = load i8* @uc, align 1
   %13 = zext i8 %12 to i16
   %14 = load i8* @sc, align 1
   %15 = sext i8 %14 to i16
   %16 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic
+  %17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic monotonic
   store i16 %17, i16* @us, align 2
   %18 = load i8* @uc, align 1
   %19 = zext i8 %18 to i32
   %20 = load i8* @sc, align 1
   %21 = sext i8 %20 to i32
   %22 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic
+  %23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic monotonic
   store i32 %23, i32* @si, align 4
   %24 = load i8* @uc, align 1
   %25 = zext i8 %24 to i32
   %26 = load i8* @sc, align 1
   %27 = sext i8 %26 to i32
   %28 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic
+  %29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic monotonic
   store i32 %29, i32* @ui, align 4
   %30 = load i8* @uc, align 1
   %31 = zext i8 %30 to i64
   %32 = load i8* @sc, align 1
   %33 = sext i8 %32 to i64
   %34 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
-  %35 = cmpxchg i64* %34, i64 %31, i64 %33 monotonic
+  %35 = cmpxchg i64* %34, i64 %31, i64 %33 monotonic monotonic
   store i64 %35, i64* @sl, align 8
   %36 = load i8* @uc, align 1
   %37 = zext i8 %36 to i64
   %38 = load i8* @sc, align 1
   %39 = sext i8 %38 to i64
   %40 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
-  %41 = cmpxchg i64* %40, i64 %37, i64 %39 monotonic
+  %41 = cmpxchg i64* %40, i64 %37, i64 %39 monotonic monotonic
   store i64 %41, i64* @ul, align 8
   %42 = load i8* @uc, align 1
   %43 = load i8* @sc, align 1
-  %44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic
+  %44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic monotonic
   %45 = icmp eq i8 %44, %42
   %46 = zext i1 %45 to i8
   %47 = zext i8 %46 to i32
   store i32 %47, i32* @ui, align 4
   %48 = load i8* @uc, align 1
   %49 = load i8* @sc, align 1
-  %50 = cmpxchg i8* @uc, i8 %48, i8 %49 monotonic
+  %50 = cmpxchg i8* @uc, i8 %48, i8 %49 monotonic monotonic
   %51 = icmp eq i8 %50, %48
   %52 = zext i1 %51 to i8
   %53 = zext i8 %52 to i32
@@ -603,7 +603,7 @@ entry:
   %56 = load i8* @sc, align 1
   %57 = sext i8 %56 to i16
   %58 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %59 = cmpxchg i16* %58, i16 %55, i16 %57 monotonic
+  %59 = cmpxchg i16* %58, i16 %55, i16 %57 monotonic monotonic
   %60 = icmp eq i16 %59, %55
   %61 = zext i1 %60 to i8
   %62 = zext i8 %61 to i32
@@ -613,7 +613,7 @@ entry:
   %65 = load i8* @sc, align 1
   %66 = sext i8 %65 to i16
   %67 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %68 = cmpxchg i16* %67, i16 %64, i16 %66 monotonic
+  %68 = cmpxchg i16* %67, i16 %64, i16 %66 monotonic monotonic
   %69 = icmp eq i16 %68, %64
   %70 = zext i1 %69 to i8
   %71 = zext i8 %70 to i32
@@ -623,7 +623,7 @@ entry:
   %74 = load i8* @sc, align 1
   %75 = sext i8 %74 to i32
   %76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %77 = cmpxchg i32* %76, i32 %73, i32 %75 monotonic
+  %77 = cmpxchg i32* %76, i32 %73, i32 %75 monotonic monotonic
   %78 = icmp eq i32 %77, %73
   %79 = zext i1 %78 to i8
   %80 = zext i8 %79 to i32
@@ -633,7 +633,7 @@ entry:
   %83 = load i8* @sc, align 1
   %84 = sext i8 %83 to i32
   %85 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %86 = cmpxchg i32* %85, i32 %82, i32 %84 monotonic
+  %86 = cmpxchg i32* %85, i32 %82, i32 %84 monotonic monotonic
   %87 = icmp eq i32 %86, %82
   %88 = zext i1 %87 to i8
   %89 = zext i8 %88 to i32
@@ -643,7 +643,7 @@ entry:
   %92 = load i8* @sc, align 1
   %93 = sext i8 %92 to i64
   %94 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
-  %95 = cmpxchg i64* %94, i64 %91, i64 %93 monotonic
+  %95 = cmpxchg i64* %94, i64 %91, i64 %93 monotonic monotonic
   %96 = icmp eq i64 %95, %91
   %97 = zext i1 %96 to i8
   %98 = zext i8 %97 to i32
@@ -653,7 +653,7 @@ entry:
   %101 = load i8* @sc, align 1
   %102 = sext i8 %101 to i64
   %103 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
-  %104 = cmpxchg i64* %103, i64 %100, i64 %102 monotonic
+  %104 = cmpxchg i64* %103, i64 %100, i64 %102 monotonic monotonic
   %105 = icmp eq i64 %104, %100
   %106 = zext i1 %105 to i8
   %107 = zext i8 %106 to i32
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index 1737916375c..083df47e562 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -11,7 +11,7 @@ define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
 define i32 @exchange_and_cmp(i32* %mem) nounwind {
 ; CHECK-LABEL: exchange_and_cmp:
 ; CHECK: lwarx
-  %tmp = cmpxchg i32* %mem, i32 0, i32 1 monotonic
+  %tmp = cmpxchg i32* %mem, i32 0, i32 1 monotonic monotonic
 ; CHECK: stwcx.
 ; CHECK: stwcx.
   ret i32 %tmp
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index e56a7796671..261335e81d8 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -11,7 +11,7 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
 define i64 @exchange_and_cmp(i64* %mem) nounwind {
 ; CHECK-LABEL: exchange_and_cmp:
 ; CHECK: ldarx
-  %tmp = cmpxchg i64* %mem, i64 0, i64 1 monotonic
+  %tmp = cmpxchg i64* %mem, i64 0, i64 1 monotonic monotonic
 ; CHECK: stdcx.
 ; CHECK: stdcx.
   ret i64 %tmp
diff --git a/test/CodeGen/SPARC/atomics.ll b/test/CodeGen/SPARC/atomics.ll
index b10336c9808..4e3e7ae6fd4 100644
--- a/test/CodeGen/SPARC/atomics.ll
+++ b/test/CodeGen/SPARC/atomics.ll
@@ -38,7 +38,7 @@ entry:
 
 define i32 @test_cmpxchg_i32(i32 %a, i32* %ptr) {
 entry:
-  %b = cmpxchg i32* %ptr, i32 %a, i32 123 monotonic
+  %b = cmpxchg i32* %ptr, i32 %a, i32 123 monotonic monotonic
   ret i32 %b
 }
 
@@ -48,7 +48,7 @@ entry:
 
 define i64 @test_cmpxchg_i64(i64 %a, i64* %ptr) {
 entry:
-  %b = cmpxchg i64* %ptr, i64 %a, i64 123 monotonic
+  %b = cmpxchg i64* %ptr, i64 %a, i64 123 monotonic monotonic
   ret i64 %b
 }
 
diff --git a/test/CodeGen/SystemZ/cmpxchg-01.ll b/test/CodeGen/SystemZ/cmpxchg-01.ll
index d5ea9778690..bb0b18ad57c 100644
--- a/test/CodeGen/SystemZ/cmpxchg-01.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-01.ll
@@ -32,7 +32,7 @@ define i8 @f1(i8 %dummy, i8 *%src, i8 %cmp, i8 %swap) {
 ; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT: rll
 ; CHECK-SHIFT: rll {{%r[0-9]+}}, %r5, -8([[NEGSHIFT]])
-  %res = cmpxchg i8 *%src, i8 %cmp, i8 %swap seq_cst
+  %res = cmpxchg i8 *%src, i8 %cmp, i8 %swap seq_cst seq_cst
   ret i8 %res
 }
 
@@ -50,6 +50,6 @@ define i8 @f2(i8 *%src) {
 ; CHECK-SHIFT: risbg
 ; CHECK-SHIFT: risbg [[SWAP]], {{%r[0-9]+}}, 32, 55, 0
 ; CHECK-SHIFT: br %r14
-  %res = cmpxchg i8 *%src, i8 42, i8 88 seq_cst
+  %res = cmpxchg i8 *%src, i8 42, i8 88 seq_cst seq_cst
   ret i8 %res
 }
diff --git a/test/CodeGen/SystemZ/cmpxchg-02.ll b/test/CodeGen/SystemZ/cmpxchg-02.ll
index 08c79d717c1..8d46a8c0736 100644
--- a/test/CodeGen/SystemZ/cmpxchg-02.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-02.ll
@@ -32,7 +32,7 @@ define i16 @f1(i16 %dummy, i16 *%src, i16 %cmp, i16 %swap) {
 ; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT: rll
 ; CHECK-SHIFT: rll {{%r[0-9]+}}, %r5, -16([[NEGSHIFT]])
-  %res = cmpxchg i16 *%src, i16 %cmp, i16 %swap seq_cst
+  %res = cmpxchg i16 *%src, i16 %cmp, i16 %swap seq_cst seq_cst
   ret i16 %res
 }
 
@@ -50,6 +50,6 @@ define i16 @f2(i16 *%src) {
 ; CHECK-SHIFT: risbg
 ; CHECK-SHIFT: risbg [[SWAP]], {{%r[0-9]+}}, 32, 47, 0
 ; CHECK-SHIFT: br %r14
-  %res = cmpxchg i16 *%src, i16 42, i16 88 seq_cst
+  %res = cmpxchg i16 *%src, i16 42, i16 88 seq_cst seq_cst
   ret i16 %res
 }
diff --git a/test/CodeGen/SystemZ/cmpxchg-03.ll b/test/CodeGen/SystemZ/cmpxchg-03.ll
index 3917979ac24..f6a2ad0b691 100644
--- a/test/CodeGen/SystemZ/cmpxchg-03.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-03.ll
@@ -7,7 +7,7 @@ define i32 @f1(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK-LABEL: f1:
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
-  %val = cmpxchg i32 *%src, i32 %cmp, i32 %swap seq_cst
+  %val = cmpxchg i32 *%src, i32 %cmp, i32 %swap seq_cst seq_cst
   ret i32 %val
 }
 
@@ -17,7 +17,7 @@ define i32 @f2(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: cs %r2, %r3, 4092(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1023
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
   ret i32 %val
 }
 
@@ -27,7 +27,7 @@ define i32 @f3(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: csy %r2, %r3, 4096(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1024
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
   ret i32 %val
 }
 
@@ -37,7 +37,7 @@ define i32 @f4(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: csy %r2, %r3, 524284(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
   ret i32 %val
 }
 
@@ -49,7 +49,7 @@ define i32 @f5(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131072
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
   ret i32 %val
 }
 
@@ -59,7 +59,7 @@ define i32 @f6(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: csy %r2, %r3, -4(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
   ret i32 %val
 }
 
@@ -69,7 +69,7 @@ define i32 @f7(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: csy %r2, %r3, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
   ret i32 %val
 }
 
@@ -81,7 +81,7 @@ define i32 @f8(i32 %cmp, i32 %swap, i32 *%src) {
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131073
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
   ret i32 %val
 }
 
@@ -93,7 +93,7 @@ define i32 @f9(i32 %cmp, i32 %swap, i64 %src, i64 %index) {
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
   %ptr = inttoptr i64 %add1 to i32 *
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
   ret i32 %val
 }
 
@@ -106,7 +106,7 @@ define i32 @f10(i32 %cmp, i32 %swap, i64 %src, i64 %index) {
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
   ret i32 %val
 }
 
@@ -116,7 +116,7 @@ define i32 @f11(i32 %dummy, i32 %swap, i32 *%ptr) {
 ; CHECK: lhi %r2, 1001
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
-  %val = cmpxchg i32 *%ptr, i32 1001, i32 %swap seq_cst
+  %val = cmpxchg i32 *%ptr, i32 1001, i32 %swap seq_cst seq_cst
   ret i32 %val
 }
 
@@ -126,6 +126,6 @@ define i32 @f12(i32 %cmp, i32 *%ptr) {
 ; CHECK: lhi [[SWAP:%r[0-9]+]], 1002
 ; CHECK: cs %r2, [[SWAP]], 0(%r3)
 ; CHECK: br %r14
-  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 1002 seq_cst
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 1002 seq_cst seq_cst
   ret i32 %val
 }
diff --git a/test/CodeGen/SystemZ/cmpxchg-04.ll b/test/CodeGen/SystemZ/cmpxchg-04.ll
index f58868f04f2..069bad65144 100644
--- a/test/CodeGen/SystemZ/cmpxchg-04.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-04.ll
@@ -7,7 +7,7 @@ define i64 @f1(i64 %cmp, i64 %swap, i64 *%src) {
 ; CHECK-LABEL: f1:
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
-  %val = cmpxchg i64 *%src, i64 %cmp, i64 %swap seq_cst
+  %val = cmpxchg i64 *%src, i64 %cmp, i64 %swap seq_cst seq_cst
   ret i64 %val
 }
 
@@ -17,7 +17,7 @@ define i64 @f2(i64 %cmp, i64 %swap, i64 *%src) {
 ; CHECK: csg %r2, %r3, 524280(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
   ret i64 %val
 }
 
@@ -29,7 +29,7 @@ define i64 @f3(i64 %cmp, i64 %swap, i64 *%src) {
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65536
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
   ret i64 %val
 }
 
@@ -39,7 +39,7 @@ define i64 @f4(i64 %cmp, i64 %swap, i64 *%src) {
 ; CHECK: csg %r2, %r3, -8(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
   ret i64 %val
 }
 
@@ -49,7 +49,7 @@ define i64 @f5(i64 %cmp, i64 %swap, i64 *%src) {
 ; CHECK: csg %r2, %r3, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
   ret i64 %val
 }
 
@@ -61,7 +61,7 @@ define i64 @f6(i64 %cmp, i64 %swap, i64 *%src) {
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65537
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
   ret i64 %val
 }
 
@@ -73,7 +73,7 @@ define i64 @f7(i64 %cmp, i64 %swap, i64 %src, i64 %index) {
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
   %ptr = inttoptr i64 %add1 to i64 *
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
   ret i64 %val
 }
 
@@ -83,7 +83,7 @@ define i64 @f8(i64 %dummy, i64 %swap, i64 *%ptr) {
 ; CHECK: lghi %r2, 1001
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
-  %val = cmpxchg i64 *%ptr, i64 1001, i64 %swap seq_cst
+  %val = cmpxchg i64 *%ptr, i64 1001, i64 %swap seq_cst seq_cst
   ret i64 %val
 }
 
@@ -93,6 +93,6 @@ define i64 @f9(i64 %cmp, i64 *%ptr) {
 ; CHECK: lghi [[SWAP:%r[0-9]+]], 1002
 ; CHECK: csg %r2, [[SWAP]], 0(%r3)
 ; CHECK: br %r14
-  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 1002 seq_cst
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 1002 seq_cst seq_cst
   ret i64 %val
 }
diff --git a/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
index 0e4118a2a91..f69cedc4d37 100644
--- a/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
+++ b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
@@ -18,7 +18,7 @@ entry:
 loop:
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchg8b
-  %r = cmpxchg i64* %ptr, i64 0, i64 1 monotonic
+  %r = cmpxchg i64* %ptr, i64 0, i64 1 monotonic monotonic
   %stored1  = icmp eq i64 %r, 0
   br i1 %stored1, label %loop, label %continue
 continue:
diff --git a/test/CodeGen/X86/Atomics-64.ll b/test/CodeGen/X86/Atomics-64.ll
index 8b0a349a8be..c2746885044 100644
--- a/test/CodeGen/X86/Atomics-64.ll
+++ b/test/CodeGen/X86/Atomics-64.ll
@@ -704,7 +704,7 @@ entry:
   %3 = zext i8 %2 to i32
   %4 = trunc i32 %3 to i8
   %5 = trunc i32 %1 to i8
-  %6 = cmpxchg i8* @sc, i8 %4, i8 %5 monotonic
+  %6 = cmpxchg i8* @sc, i8 %4, i8 %5 monotonic monotonic
   store i8 %6, i8* @sc, align 1
   %7 = load i8* @sc, align 1
   %8 = zext i8 %7 to i32
@@ -712,7 +712,7 @@ entry:
   %10 = zext i8 %9 to i32
   %11 = trunc i32 %10 to i8
   %12 = trunc i32 %8 to i8
-  %13 = cmpxchg i8* @uc, i8 %11, i8 %12 monotonic
+  %13 = cmpxchg i8* @uc, i8 %11, i8 %12 monotonic monotonic
   store i8 %13, i8* @uc, align 1
   %14 = load i8* @sc, align 1
   %15 = sext i8 %14 to i16
@@ -722,7 +722,7 @@ entry:
   %19 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %20 = trunc i32 %18 to i16
   %21 = trunc i32 %16 to i16
-  %22 = cmpxchg i16* %19, i16 %20, i16 %21 monotonic
+  %22 = cmpxchg i16* %19, i16 %20, i16 %21 monotonic monotonic
   store i16 %22, i16* @ss, align 2
   %23 = load i8* @sc, align 1
   %24 = sext i8 %23 to i16
@@ -732,49 +732,49 @@ entry:
   %28 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %29 = trunc i32 %27 to i16
   %30 = trunc i32 %25 to i16
-  %31 = cmpxchg i16* %28, i16 %29, i16 %30 monotonic
+  %31 = cmpxchg i16* %28, i16 %29, i16 %30 monotonic monotonic
   store i16 %31, i16* @us, align 2
   %32 = load i8* @sc, align 1
   %33 = sext i8 %32 to i32
   %34 = load i8* @uc, align 1
   %35 = zext i8 %34 to i32
   %36 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %37 = cmpxchg i32* %36, i32 %35, i32 %33 monotonic
+  %37 = cmpxchg i32* %36, i32 %35, i32 %33 monotonic monotonic
   store i32 %37, i32* @si, align 4
   %38 = load i8* @sc, align 1
   %39 = sext i8 %38 to i32
   %40 = load i8* @uc, align 1
   %41 = zext i8 %40 to i32
   %42 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %43 = cmpxchg i32* %42, i32 %41, i32 %39 monotonic
+  %43 = cmpxchg i32* %42, i32 %41, i32 %39 monotonic monotonic
   store i32 %43, i32* @ui, align 4
   %44 = load i8* @sc, align 1
   %45 = sext i8 %44 to i64
   %46 = load i8* @uc, align 1
   %47 = zext i8 %46 to i64
   %48 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
-  %49 = cmpxchg i64* %48, i64 %47, i64 %45 monotonic
+  %49 = cmpxchg i64* %48, i64 %47, i64 %45 monotonic monotonic
   store i64 %49, i64* @sl, align 8
   %50 = load i8* @sc, align 1
   %51 = sext i8 %50 to i64
   %52 = load i8* @uc, align 1
   %53 = zext i8 %52 to i64
   %54 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
-  %55 = cmpxchg i64* %54, i64 %53, i64 %51 monotonic
+  %55 = cmpxchg i64* %54, i64 %53, i64 %51 monotonic monotonic
   store i64 %55, i64* @ul, align 8
   %56 = load i8* @sc, align 1
   %57 = sext i8 %56 to i64
   %58 = load i8* @uc, align 1
   %59 = zext i8 %58 to i64
   %60 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
-  %61 = cmpxchg i64* %60, i64 %59, i64 %57 monotonic
+  %61 = cmpxchg i64* %60, i64 %59, i64 %57 monotonic monotonic
   store i64 %61, i64* @sll, align 8
   %62 = load i8* @sc, align 1
   %63 = sext i8 %62 to i64
   %64 = load i8* @uc, align 1
   %65 = zext i8 %64 to i64
   %66 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
-  %67 = cmpxchg i64* %66, i64 %65, i64 %63 monotonic
+  %67 = cmpxchg i64* %66, i64 %65, i64 %63 monotonic monotonic
   store i64 %67, i64* @ull, align 8
   %68 = load i8* @sc, align 1
   %69 = zext i8 %68 to i32
@@ -782,7 +782,7 @@ entry:
   %71 = zext i8 %70 to i32
   %72 = trunc i32 %71 to i8
   %73 = trunc i32 %69 to i8
-  %74 = cmpxchg i8* @sc, i8 %72, i8 %73 monotonic
+  %74 = cmpxchg i8* @sc, i8 %72, i8 %73 monotonic monotonic
   %75 = icmp eq i8 %74, %72
   %76 = zext i1 %75 to i8
   %77 = zext i8 %76 to i32
@@ -793,7 +793,7 @@ entry:
   %81 = zext i8 %80 to i32
   %82 = trunc i32 %81 to i8
   %83 = trunc i32 %79 to i8
-  %84 = cmpxchg i8* @uc, i8 %82, i8 %83 monotonic
+  %84 = cmpxchg i8* @uc, i8 %82, i8 %83 monotonic monotonic
   %85 = icmp eq i8 %84, %82
   %86 = zext i1 %85 to i8
   %87 = zext i8 %86 to i32
@@ -805,7 +805,7 @@ entry:
   %92 = zext i8 %91 to i32
   %93 = trunc i32 %92 to i8
   %94 = trunc i32 %90 to i8
-  %95 = cmpxchg i8* bitcast (i16* @ss to i8*), i8 %93, i8 %94 monotonic
+  %95 = cmpxchg i8* bitcast (i16* @ss to i8*), i8 %93, i8 %94 monotonic monotonic
   %96 = icmp eq i8 %95, %93
   %97 = zext i1 %96 to i8
   %98 = zext i8 %97 to i32
@@ -817,7 +817,7 @@ entry:
   %103 = zext i8 %102 to i32
   %104 = trunc i32 %103 to i8
   %105 = trunc i32 %101 to i8
-  %106 = cmpxchg i8* bitcast (i16* @us to i8*), i8 %104, i8 %105 monotonic
+  %106 = cmpxchg i8* bitcast (i16* @us to i8*), i8 %104, i8 %105 monotonic monotonic
   %107 = icmp eq i8 %106, %104
   %108 = zext i1 %107 to i8
   %109 = zext i8 %108 to i32
@@ -828,7 +828,7 @@ entry:
   %113 = zext i8 %112 to i32
   %114 = trunc i32 %113 to i8
   %115 = trunc i32 %111 to i8
-  %116 = cmpxchg i8* bitcast (i32* @si to i8*), i8 %114, i8 %115 monotonic
+  %116 = cmpxchg i8* bitcast (i32* @si to i8*), i8 %114, i8 %115 monotonic monotonic
   %117 = icmp eq i8 %116, %114
   %118 = zext i1 %117 to i8
   %119 = zext i8 %118 to i32
@@ -839,7 +839,7 @@ entry:
   %123 = zext i8 %122 to i32
   %124 = trunc i32 %123 to i8
   %125 = trunc i32 %121 to i8
-  %126 = cmpxchg i8* bitcast (i32* @ui to i8*), i8 %124, i8 %125 monotonic
+  %126 = cmpxchg i8* bitcast (i32* @ui to i8*), i8 %124, i8 %125 monotonic monotonic
   %127 = icmp eq i8 %126, %124
   %128 = zext i1 %127 to i8
   %129 = zext i8 %128 to i32
@@ -850,7 +850,7 @@ entry:
   %133 = zext i8 %132 to i64
   %134 = trunc i64 %133 to i8
   %135 = trunc i64 %131 to i8
-  %136 = cmpxchg i8* bitcast (i64* @sl to i8*), i8 %134, i8 %135 monotonic
+  %136 = cmpxchg i8* bitcast (i64* @sl to i8*), i8 %134, i8 %135 monotonic monotonic
   %137 = icmp eq i8 %136, %134
   %138 = zext i1 %137 to i8
   %139 = zext i8 %138 to i32
@@ -861,7 +861,7 @@ entry:
   %143 = zext i8 %142 to i64
   %144 = trunc i64 %143 to i8
   %145 = trunc i64 %141 to i8
-  %146 = cmpxchg i8* bitcast (i64* @ul to i8*), i8 %144, i8 %145 monotonic
+  %146 = cmpxchg i8* bitcast (i64* @ul to i8*), i8 %144, i8 %145 monotonic monotonic
   %147 = icmp eq i8 %146, %144
   %148 = zext i1 %147 to i8
   %149 = zext i8 %148 to i32
@@ -872,7 +872,7 @@ entry:
   %153 = zext i8 %152 to i64
   %154 = trunc i64 %153 to i8
   %155 = trunc i64 %151 to i8
-  %156 = cmpxchg i8* bitcast (i64* @sll to i8*), i8 %154, i8 %155 monotonic
+  %156 = cmpxchg i8* bitcast (i64* @sll to i8*), i8 %154, i8 %155 monotonic monotonic
   %157 = icmp eq i8 %156, %154
   %158 = zext i1 %157 to i8
   %159 = zext i8 %158 to i32
@@ -883,7 +883,7 @@ entry:
   %163 = zext i8 %162 to i64
   %164 = trunc i64 %163 to i8
   %165 = trunc i64 %161 to i8
-  %166 = cmpxchg i8* bitcast (i64* @ull to i8*), i8 %164, i8 %165 monotonic
+  %166 = cmpxchg i8* bitcast (i64* @ull to i8*), i8 %164, i8 %165 monotonic monotonic
   %167 = icmp eq i8 %166, %164
   %168 = zext i1 %167 to i8
   %169 = zext i8 %168 to i32
diff --git a/test/CodeGen/X86/atomic16.ll b/test/CodeGen/X86/atomic16.ll
index ec2887e29f8..45d3ff46a04 100644
--- a/test/CodeGen/X86/atomic16.ll
+++ b/test/CodeGen/X86/atomic16.ll
@@ -217,7 +217,7 @@ define void @atomic_fetch_umin16(i16 %x) nounwind {
 }
 
 define void @atomic_fetch_cmpxchg16() nounwind {
-  %t1 = cmpxchg i16* @sc16, i16 0, i16 1 acquire
+  %t1 = cmpxchg i16* @sc16, i16 0, i16 1 acquire acquire
 ; X64:       lock
 ; X64:       cmpxchgw
 ; X32:       lock
diff --git a/test/CodeGen/X86/atomic32.ll b/test/CodeGen/X86/atomic32.ll
index 3cb9ca1c76c..474c0e6a985 100644
--- a/test/CodeGen/X86/atomic32.ll
+++ b/test/CodeGen/X86/atomic32.ll
@@ -243,7 +243,7 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
 }
 
 define void @atomic_fetch_cmpxchg32() nounwind {
-  %t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire
+  %t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire acquire
 ; X64:       lock
 ; X64:       cmpxchgl
 ; X32:       lock
diff --git a/test/CodeGen/X86/atomic64.ll b/test/CodeGen/X86/atomic64.ll
index aa000455753..4f55edc0567 100644
--- a/test/CodeGen/X86/atomic64.ll
+++ b/test/CodeGen/X86/atomic64.ll
@@ -183,7 +183,7 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_cmpxchg64() nounwind {
-  %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire
+  %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
 ; X64:       lock
 ; X64:       cmpxchgq
 ; X32:       lock
diff --git a/test/CodeGen/X86/atomic6432.ll b/test/CodeGen/X86/atomic6432.ll
index 31e66c876e3..c0f7267abe7 100644
--- a/test/CodeGen/X86/atomic6432.ll
+++ b/test/CodeGen/X86/atomic6432.ll
@@ -184,7 +184,7 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
 }
 
 define void @atomic_fetch_cmpxchg64() nounwind {
-  %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire
+  %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
 ; X32:       lock
 ; X32:       cmpxchg8b
   ret void
diff --git a/test/CodeGen/X86/atomic8.ll b/test/CodeGen/X86/atomic8.ll
index 3278ed1f504..203b26f0ab9 100644
--- a/test/CodeGen/X86/atomic8.ll
+++ b/test/CodeGen/X86/atomic8.ll
@@ -217,7 +217,7 @@ define void @atomic_fetch_umin8(i8 %x) nounwind {
 }
 
 define void @atomic_fetch_cmpxchg8() nounwind {
-  %t1 = cmpxchg i8* @sc8, i8 0, i8 1 acquire
+  %t1 = cmpxchg i8* @sc8, i8 0, i8 1 acquire acquire
 ; X64:       lock
 ; X64:       cmpxchgb
 ; X32:       lock
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index a378d6e8d68..b3045ed645b 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -101,11 +101,11 @@ entry:
 	%neg1 = sub i32 0, 10		; <i32> [#uses=1]
         ; CHECK: lock
         ; CHECK: cmpxchgl
-  %16 = cmpxchg i32* %val2, i32 %neg1, i32 1 monotonic
+  %16 = cmpxchg i32* %val2, i32 %neg1, i32 1 monotonic monotonic
 	store i32 %16, i32* %old
         ; CHECK: lock
         ; CHECK: cmpxchgl
-  %17 = cmpxchg i32* %val2, i32 1976, i32 1 monotonic
+  %17 = cmpxchg i32* %val2, i32 1976, i32 1 monotonic monotonic
 	store i32 %17, i32* %old
         ; CHECK: movl  [[R17atomic:.*]], %eax
         ; CHECK: movl	$1401, %[[R17mask:[a-z]*]]
@@ -133,6 +133,6 @@ entry:
 ; CHECK: lock
 ; CHECK:	cmpxchgl	%{{.*}}, %gs:(%{{.*}})
 
-  %0 = cmpxchg i32 addrspace(256)* %P, i32 0, i32 1 monotonic
+  %0 = cmpxchg i32 addrspace(256)* %P, i32 0, i32 1 monotonic monotonic
   ret void
 }
diff --git a/test/CodeGen/X86/cmpxchg16b.ll b/test/CodeGen/X86/cmpxchg16b.ll
index edbd0bc9ded..1d5bb85f8d2 100644
--- a/test/CodeGen/X86/cmpxchg16b.ll
+++ b/test/CodeGen/X86/cmpxchg16b.ll
@@ -6,7 +6,7 @@ entry:
 ; CHECK: movl	$1, %ebx
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchg16b
-  %r = cmpxchg i128* %p, i128 0, i128 1 seq_cst
+  %r = cmpxchg i128* %p, i128 0, i128 1 seq_cst seq_cst
   ret void
 }
 
diff --git a/test/CodeGen/X86/coalescer-remat.ll b/test/CodeGen/X86/coalescer-remat.ll
index eb7b7a8738a..468b70bdc86 100644
--- a/test/CodeGen/X86/coalescer-remat.ll
+++ b/test/CodeGen/X86/coalescer-remat.ll
@@ -5,7 +5,7 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = cmpxchg i64* @val, i64 0, i64 1 monotonic
+  %0 = cmpxchg i64* @val, i64 0, i64 1 monotonic monotonic
   %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i64 0), i64 %0) nounwind
   ret i32 0
 }
diff --git a/test/CodeGen/X86/nocx16.ll b/test/CodeGen/X86/nocx16.ll
index cceaac47122..8b995dafa75 100644
--- a/test/CodeGen/X86/nocx16.ll
+++ b/test/CodeGen/X86/nocx16.ll
@@ -2,7 +2,7 @@
 define void @test(i128* %a) nounwind {
 entry:
 ; CHECK: __sync_val_compare_and_swap_16
-  %0 = cmpxchg i128* %a, i128 1, i128 1 seq_cst
+  %0 = cmpxchg i128* %a, i128 1, i128 1 seq_cst seq_cst
 ; CHECK: __sync_lock_test_and_set_16
   %1 = atomicrmw xchg i128* %a, i128 1 seq_cst
 ; CHECK: __sync_fetch_and_add_16
diff --git a/test/Instrumentation/AddressSanitizer/test64.ll b/test/Instrumentation/AddressSanitizer/test64.ll
index 5b30fae714f..6390644422c 100644
--- a/test/Instrumentation/AddressSanitizer/test64.ll
+++ b/test/Instrumentation/AddressSanitizer/test64.ll
@@ -26,7 +26,7 @@ entry:
 
 define void @example_cmpxchg(i64* %ptr, i64 %compare_to, i64 %new_value) nounwind uwtable sanitize_address {
 entry:
-  %0 = cmpxchg i64* %ptr, i64 %compare_to, i64 %new_value seq_cst
+  %0 = cmpxchg i64* %ptr, i64 %compare_to, i64 %new_value seq_cst seq_cst
   ret void
 }
 
diff --git a/test/Instrumentation/MemorySanitizer/atomics.ll b/test/Instrumentation/MemorySanitizer/atomics.ll
index ff0245262cb..98697d70382 100644
--- a/test/Instrumentation/MemorySanitizer/atomics.ll
+++ b/test/Instrumentation/MemorySanitizer/atomics.ll
@@ -37,7 +37,7 @@ entry:
 
 define i32 @Cmpxchg(i32* %p, i32 %a, i32 %b) sanitize_memory {
 entry:
-  %0 = cmpxchg i32* %p, i32 %a, i32 %b seq_cst
+  %0 = cmpxchg i32* %p, i32 %a, i32 %b seq_cst seq_cst
   ret i32 %0
 }
 
@@ -46,16 +46,16 @@ entry:
 ; CHECK: icmp
 ; CHECK: br
 ; CHECK: @__msan_warning
-; CHECK: cmpxchg {{.*}} seq_cst
+; CHECK: cmpxchg {{.*}} seq_cst seq_cst
 ; CHECK: store i32 0, {{.*}} @__msan_retval_tls
 ; CHECK: ret i32
 
 
-; relaxed cmpxchg: bump up to "release"
+; relaxed cmpxchg: bump up to "release monotonic"
 
 define i32 @CmpxchgMonotonic(i32* %p, i32 %a, i32 %b) sanitize_memory {
 entry:
-  %0 = cmpxchg i32* %p, i32 %a, i32 %b monotonic
+  %0 = cmpxchg i32* %p, i32 %a, i32 %b monotonic monotonic
   ret i32 %0
 }
 
@@ -64,7 +64,7 @@ entry:
 ; CHECK: icmp
 ; CHECK: br
 ; CHECK: @__msan_warning
-; CHECK: cmpxchg {{.*}} release
+; CHECK: cmpxchg {{.*}} release monotonic
 ; CHECK: store i32 0, {{.*}} @__msan_retval_tls
 ; CHECK: ret i32
 
diff --git a/test/Instrumentation/ThreadSanitizer/atomic.ll b/test/Instrumentation/ThreadSanitizer/atomic.ll
index 70b6cbbf310..e40268f97b9 100644
--- a/test/Instrumentation/ThreadSanitizer/atomic.ll
+++ b/test/Instrumentation/ThreadSanitizer/atomic.ll
@@ -348,7 +348,7 @@ entry:
 
 define void @atomic8_cas_monotonic(i8* %a) nounwind uwtable {
 entry:
-  cmpxchg i8* %a, i8 0, i8 1 monotonic
+  cmpxchg i8* %a, i8 0, i8 1 monotonic monotonic
   ret void
 }
 ; CHECK: atomic8_cas_monotonic
@@ -356,7 +356,7 @@ entry:
 
 define void @atomic8_cas_acquire(i8* %a) nounwind uwtable {
 entry:
-  cmpxchg i8* %a, i8 0, i8 1 acquire
+  cmpxchg i8* %a, i8 0, i8 1 acquire acquire
   ret void
 }
 ; CHECK: atomic8_cas_acquire
@@ -364,7 +364,7 @@ entry:
 
 define void @atomic8_cas_release(i8* %a) nounwind uwtable {
 entry:
-  cmpxchg i8* %a, i8 0, i8 1 release
+  cmpxchg i8* %a, i8 0, i8 1 release monotonic
   ret void
 }
 ; CHECK: atomic8_cas_release
@@ -372,7 +372,7 @@ entry:
 
 define void @atomic8_cas_acq_rel(i8* %a) nounwind uwtable {
 entry:
-  cmpxchg i8* %a, i8 0, i8 1 acq_rel
+  cmpxchg i8* %a, i8 0, i8 1 acq_rel acquire
   ret void
 }
 ; CHECK: atomic8_cas_acq_rel
@@ -380,7 +380,7 @@ entry:
 
 define void @atomic8_cas_seq_cst(i8* %a) nounwind uwtable {
 entry:
-  cmpxchg i8* %a, i8 0, i8 1 seq_cst
+  cmpxchg i8* %a, i8 0, i8 1 seq_cst seq_cst
   ret void
 }
 ; CHECK: atomic8_cas_seq_cst
@@ -732,7 +732,7 @@ entry:
 
 define void @atomic16_cas_monotonic(i16* %a) nounwind uwtable {
 entry:
-  cmpxchg i16* %a, i16 0, i16 1 monotonic
+  cmpxchg i16* %a, i16 0, i16 1 monotonic monotonic
   ret void
 }
 ; CHECK: atomic16_cas_monotonic
@@ -740,7 +740,7 @@ entry:
 
 define void @atomic16_cas_acquire(i16* %a) nounwind uwtable {
 entry:
-  cmpxchg i16* %a, i16 0, i16 1 acquire
+  cmpxchg i16* %a, i16 0, i16 1 acquire acquire
   ret void
 }
 ; CHECK: atomic16_cas_acquire
@@ -748,7 +748,7 @@ entry:
 
 define void @atomic16_cas_release(i16* %a) nounwind uwtable {
 entry:
-  cmpxchg i16* %a, i16 0, i16 1 release
+  cmpxchg i16* %a, i16 0, i16 1 release monotonic
   ret void
 }
 ; CHECK: atomic16_cas_release
@@ -756,7 +756,7 @@ entry:
 
 define void @atomic16_cas_acq_rel(i16* %a) nounwind uwtable {
 entry:
-  cmpxchg i16* %a, i16 0, i16 1 acq_rel
+  cmpxchg i16* %a, i16 0, i16 1 acq_rel acquire
   ret void
 }
 ; CHECK: atomic16_cas_acq_rel
@@ -764,7 +764,7 @@ entry:
 
 define void @atomic16_cas_seq_cst(i16* %a) nounwind uwtable {
 entry:
-  cmpxchg i16* %a, i16 0, i16 1 seq_cst
+  cmpxchg i16* %a, i16 0, i16 1 seq_cst seq_cst
   ret void
 }
 ; CHECK: atomic16_cas_seq_cst
@@ -1116,7 +1116,7 @@ entry:
 
 define void @atomic32_cas_monotonic(i32* %a) nounwind uwtable {
 entry:
-  cmpxchg i32* %a, i32 0, i32 1 monotonic
+  cmpxchg i32* %a, i32 0, i32 1 monotonic monotonic
   ret void
 }
 ; CHECK: atomic32_cas_monotonic
@@ -1124,7 +1124,7 @@ entry:
 
 define void @atomic32_cas_acquire(i32* %a) nounwind uwtable {
 entry:
-  cmpxchg i32* %a, i32 0, i32 1 acquire
+  cmpxchg i32* %a, i32 0, i32 1 acquire acquire
   ret void
 }
 ; CHECK: atomic32_cas_acquire
@@ -1132,7 +1132,7 @@ entry:
 
 define void @atomic32_cas_release(i32* %a) nounwind uwtable {
 entry:
-  cmpxchg i32* %a, i32 0, i32 1 release
+  cmpxchg i32* %a, i32 0, i32 1 release monotonic
   ret void
 }
 ; CHECK: atomic32_cas_release
@@ -1140,7 +1140,7 @@ entry:
 
 define void @atomic32_cas_acq_rel(i32* %a) nounwind uwtable {
 entry:
-  cmpxchg i32* %a, i32 0, i32 1 acq_rel
+  cmpxchg i32* %a, i32 0, i32 1 acq_rel acquire
   ret void
 }
 ; CHECK: atomic32_cas_acq_rel
@@ -1148,7 +1148,7 @@ entry:
 
 define void @atomic32_cas_seq_cst(i32* %a) nounwind uwtable {
 entry:
-  cmpxchg i32* %a, i32 0, i32 1 seq_cst
+  cmpxchg i32* %a, i32 0, i32 1 seq_cst seq_cst
   ret void
 }
 ; CHECK: atomic32_cas_seq_cst
@@ -1500,7 +1500,7 @@ entry:
 
 define void @atomic64_cas_monotonic(i64* %a) nounwind uwtable {
 entry:
-  cmpxchg i64* %a, i64 0, i64 1 monotonic
+  cmpxchg i64* %a, i64 0, i64 1 monotonic monotonic
   ret void
 }
 ; CHECK: atomic64_cas_monotonic
@@ -1508,7 +1508,7 @@ entry:
 
 define void @atomic64_cas_acquire(i64* %a) nounwind uwtable {
 entry:
-  cmpxchg i64* %a, i64 0, i64 1 acquire
+  cmpxchg i64* %a, i64 0, i64 1 acquire acquire
   ret void
 }
 ; CHECK: atomic64_cas_acquire
@@ -1516,7 +1516,7 @@ entry:
 
 define void @atomic64_cas_release(i64* %a) nounwind uwtable {
 entry:
-  cmpxchg i64* %a, i64 0, i64 1 release
+  cmpxchg i64* %a, i64 0, i64 1 release monotonic
   ret void
 }
 ; CHECK: atomic64_cas_release
@@ -1524,7 +1524,7 @@ entry:
 
 define void @atomic64_cas_acq_rel(i64* %a) nounwind uwtable {
 entry:
-  cmpxchg i64* %a, i64 0, i64 1 acq_rel
+  cmpxchg i64* %a, i64 0, i64 1 acq_rel acquire
   ret void
 }
 ; CHECK: atomic64_cas_acq_rel
@@ -1532,7 +1532,7 @@ entry:
 
 define void @atomic64_cas_seq_cst(i64* %a) nounwind uwtable {
 entry:
-  cmpxchg i64* %a, i64 0, i64 1 seq_cst
+  cmpxchg i64* %a, i64 0, i64 1 seq_cst seq_cst
   ret void
 }
 ; CHECK: atomic64_cas_seq_cst
@@ -1884,7 +1884,7 @@ entry:
 
 define void @atomic128_cas_monotonic(i128* %a) nounwind uwtable {
 entry:
-  cmpxchg i128* %a, i128 0, i128 1 monotonic
+  cmpxchg i128* %a, i128 0, i128 1 monotonic monotonic
   ret void
 }
 ; CHECK: atomic128_cas_monotonic
@@ -1892,7 +1892,7 @@ entry:
 
 define void @atomic128_cas_acquire(i128* %a) nounwind uwtable {
 entry:
-  cmpxchg i128* %a, i128 0, i128 1 acquire
+  cmpxchg i128* %a, i128 0, i128 1 acquire acquire
   ret void
 }
 ; CHECK: atomic128_cas_acquire
@@ -1900,7 +1900,7 @@ entry:
 
 define void @atomic128_cas_release(i128* %a) nounwind uwtable {
 entry:
-  cmpxchg i128* %a, i128 0, i128 1 release
+  cmpxchg i128* %a, i128 0, i128 1 release monotonic
   ret void
 }
 ; CHECK: atomic128_cas_release
@@ -1908,7 +1908,7 @@ entry:
 
 define void @atomic128_cas_acq_rel(i128* %a) nounwind uwtable {
 entry:
-  cmpxchg i128* %a, i128 0, i128 1 acq_rel
+  cmpxchg i128* %a, i128 0, i128 1 acq_rel acquire
   ret void
 }
 ; CHECK: atomic128_cas_acq_rel
@@ -1916,7 +1916,7 @@ entry:
 
 define void @atomic128_cas_seq_cst(i128* %a) nounwind uwtable {
 entry:
-  cmpxchg i128* %a, i128 0, i128 1 seq_cst
+  cmpxchg i128* %a, i128 0, i128 1 seq_cst seq_cst
   ret void
 }
 ; CHECK: atomic128_cas_seq_cst
diff --git a/test/Transforms/LowerAtomic/atomic-swap.ll b/test/Transforms/LowerAtomic/atomic-swap.ll
index 4331677764b..c319834b627 100644
--- a/test/Transforms/LowerAtomic/atomic-swap.ll
+++ b/test/Transforms/LowerAtomic/atomic-swap.ll
@@ -3,7 +3,7 @@
 define i8 @cmpswap() {
 ; CHECK-LABEL: @cmpswap(
   %i = alloca i8
-  %j = cmpxchg i8* %i, i8 0, i8 42 monotonic
+  %j = cmpxchg i8* %i, i8 0, i8 42 monotonic monotonic
 ; CHECK: [[INST:%[a-z0-9]+]] = load
 ; CHECK-NEXT: icmp
 ; CHECK-NEXT: select
diff --git a/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll b/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
index e9d93e834a5..5ae62af5458 100644
--- a/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
+++ b/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
@@ -65,7 +65,7 @@ define void @test5(i1 %C, i32* %P) {
 entry:
   br i1 %C, label %T, label %F
 T:
-  cmpxchg volatile i32* %P, i32 0, i32 1 seq_cst
+  cmpxchg volatile i32* %P, i32 0, i32 1 seq_cst seq_cst
   unreachable
 F:
   ret void
-- 
2.34.1