From: Tim Northover Date: Fri, 30 May 2014 10:09:59 +0000 (+0000) Subject: ARM & AArch64: make use of common cmpxchg idioms after expansion X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=d0dbe02fd26d47ed32c9e04a2a0385b26a49f4ed;p=oota-llvm.git ARM & AArch64: make use of common cmpxchg idioms after expansion The C and C++ semantics for compare_exchange require it to return a bool indicating success. This gets mapped to LLVM IR which follows each cmpxchg with an icmp of the value loaded against the desired value. When lowered to ldxr/stxr loops, this extra comparison is redundant: its results are implicit in the control-flow of the function. This commit makes two changes: it replaces that icmp with appropriate PHI nodes, and then makes sure earlyCSE is called after expansion to actually make use of the opportunities revealed. I've also added -{arm,aarch64}-enable-atomic-tidy options, so that existing fragile tests aren't perturbed too much by the change. Many of them either rely on undef/unreachable too pervasively to be restored to something well-defined (particularly while making sure they test the same obscure assert from many years ago), or depend on a particular CFG shape, which is disrupted by SimplifyCFG. rdar://problem/16227836 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209883 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp index d995333971b..d6d9907be7e 100644 --- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp +++ b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp @@ -300,12 +300,50 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB); - // Finally, make sure later instructions don't get reordered with a fence if - // necessary. + // Make sure later instructions don't get reordered with a fence if necessary. Builder.SetInsertPoint(BarrierBB); insertTrailingFence(Builder, SuccessOrder); Builder.CreateBr(ExitBB); + // Finally, we have control-flow based knowledge of whether the cmpxchg + // succeeded or not. We expose this to later passes by converting any + // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. + + // Setup the builder so we can create any PHIs we need. + Builder.SetInsertPoint(FailureBB, FailureBB->begin()); + BasicBlock *SuccessBB = FailureOrder == Monotonic ? BarrierBB : TryStoreBB; + PHINode *Success = 0, *Failure = 0; + + // Look for any users of the cmpxchg that are just comparing the loaded value + // against the desired one, and replace them with the CFG-derived version. + for (auto User : CI->users()) { + ICmpInst *ICmp = dyn_cast(User); + if (!ICmp) + continue; + + // Because we know ICmp uses CI, we only need one operand to be the old + // value. + if (ICmp->getOperand(0) != CI->getCompareOperand() && + ICmp->getOperand(1) != CI->getCompareOperand()) + continue; + + if (ICmp->getPredicate() == CmpInst::ICMP_EQ) { + if (!Success) { + Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); + Success->addIncoming(ConstantInt::getFalse(Ctx), LoopBB); + } + ICmp->replaceAllUsesWith(Success); + } else if (ICmp->getPredicate() == CmpInst::ICMP_NE) { + if (!Failure) { + Failure = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + Failure->addIncoming(ConstantInt::getFalse(Ctx), SuccessBB); + Failure->addIncoming(ConstantInt::getTrue(Ctx), LoopBB); + } + ICmp->replaceAllUsesWith(Failure); + } + } + CI->replaceAllUsesWith(Loaded); CI->eraseFromParent(); diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 0b5dd2f067e..ba301e95538 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -53,6 +53,12 @@ static cl::opt EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden); +static cl::opt +EnableAtomicTidy("aarch64-atomic-cfg-tidy", cl::Hidden, + cl::desc("Run SimplifyCFG after expanding atomic operations" + " to make use of cmpxchg flow-based information"), + cl::init(true)); + extern "C" void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine X(TheAArch64leTarget); @@ -113,6 +119,7 @@ public: return getTM(); } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; bool addILPOpts() override; @@ -135,6 +142,20 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { return new AArch64PassConfig(this, PM); } +void AArch64PassConfig::addIRPasses() { + // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg + // ourselves. + addPass(createAtomicExpandLoadLinkedPass(TM)); + + // Cmpxchg instructions are often used with a subsequent comparison to + // determine whether it succeeded. We can exploit existing control-flow in + // ldrex/strex loops to simplify this, but it needs tidying up. + if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) + addPass(createCFGSimplificationPass()); + + TargetPassConfig::addIRPasses(); +} + // Pass Pipeline Configuration bool AArch64PassConfig::addPreISel() { // Run promote constant before global merge, so that the promoted constants @@ -146,10 +167,6 @@ bool AArch64PassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); - // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg - // ourselves. - addPass(createAtomicExpandLoadLinkedPass(TM)); - return false; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 887622705ed..6ef2ea4e103 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -28,6 +28,12 @@ DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden, cl::desc("Inhibit optimization of S->D register accesses on A15"), cl::init(false)); +static cl::opt +EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden, + cl::desc("Run SimplifyCFG after expanding atomic operations" + " to make use of cmpxchg flow-based information"), + cl::init(true)); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine X(TheARMLETarget); @@ -213,6 +219,7 @@ public: return *getARMTargetMachine().getSubtargetImpl(); } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; bool addPreRegAlloc() override; @@ -225,11 +232,22 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { return new ARMPassConfig(this, PM); } -bool ARMPassConfig::addPreISel() { +void ARMPassConfig::addIRPasses() { const ARMSubtarget *Subtarget = &getARMSubtarget(); - if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) + if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { addPass(createAtomicExpandLoadLinkedPass(TM)); + // Cmpxchg instructions are often used with a subsequent comparison to + // determine whether it succeeded. We can exploit existing control-flow in + // ldrex/strex loops to simplify this, but it needs tidying up. + if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) + addPass(createCFGSimplificationPass()); + } + + TargetPassConfig::addIRPasses(); +} + +bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createGlobalMergePass(TM)); diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll index a2266b1d36d..ceea8a08ece 100644 --- a/test/CodeGen/AArch64/addsub_ext.ll +++ b/test/CodeGen/AArch64/addsub_ext.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s @var8 = global i8 0 @var16 = global i16 0 diff --git a/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll index cf6235570a4..38661a5f38f 100644 --- a/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll +++ b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll @@ -1,4 +1,4 @@ -; RUN: llc %s -o - | FileCheck %s +; RUN: llc %s -o - -aarch64-atomic-cfg-tidy=0 | FileCheck %s ; Check that ANDS (tst) is not merged with ADD when the immediate ; is not 0. ; diff --git a/test/CodeGen/AArch64/arm64-cse.ll b/test/CodeGen/AArch64/arm64-cse.ll index bb14c895504..5d62cfe76a8 100644 --- a/test/CodeGen/AArch64/arm64-cse.ll +++ b/test/CodeGen/AArch64/arm64-cse.ll @@ -1,4 +1,4 @@ -; RUN: llc -O3 < %s | FileCheck %s +; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 | FileCheck %s target triple = "arm64-apple-ios" ; rdar://12462006 diff --git a/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/test/CodeGen/AArch64/arm64-early-ifcvt.ll index 17d783a488f..44150c29aeb 100644 --- a/test/CodeGen/AArch64/arm64-early-ifcvt.ll +++ b/test/CodeGen/AArch64/arm64-early-ifcvt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -stress-early-ifcvt | FileCheck %s +; RUN: llc < %s -stress-early-ifcvt -aarch64-atomic-cfg-tidy=0 | FileCheck %s target triple = "arm64-apple-macosx" ; CHECK: mm2 diff --git a/test/CodeGen/AArch64/arm64-fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll index 57bbb93e12b..b1d50102aa2 100644 --- a/test/CodeGen/AArch64/arm64-fp128.ll +++ b/test/CodeGen/AArch64/arm64-fp128.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone < %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s @lhs = global fp128 zeroinitializer, align 16 @rhs = global fp128 zeroinitializer, align 16 diff --git a/test/CodeGen/AArch64/arm64-frame-index.ll b/test/CodeGen/AArch64/arm64-frame-index.ll index 4a91ff31d8c..321f3354ca2 100644 --- a/test/CodeGen/AArch64/arm64-frame-index.ll +++ b/test/CodeGen/AArch64/arm64-frame-index.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=arm64 -mtriple=arm64-apple-ios < %s | FileCheck %s +; RUN: llc -march=arm64 -mtriple=arm64-apple-ios -aarch64-atomic-cfg-tidy=0 < %s | FileCheck %s ; rdar://11935841 define void @t1() nounwind ssp { diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll index 6cffbdeef8a..0c300de802b 100644 --- a/test/CodeGen/AArch64/arm64-xaluo.ll +++ b/test/CodeGen/AArch64/arm64-xaluo.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-atomic-cfg-tidy=0 | FileCheck %s ; ; Get the actual value of the overflow bit. diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll index 58b5d1d078c..36d0eda1e12 100644 --- a/test/CodeGen/AArch64/atomic-ops.ll +++ b/test/CodeGen/AArch64/atomic-ops.ll @@ -889,8 +889,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; function there. ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] - ; As above, w1 is a reasonable guess. -; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] +; CHECK: stxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb @@ -911,8 +910,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; function there. ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] - ; As above, w1 is a reasonable guess. -; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] +; CHECK: stlxrh [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb @@ -933,8 +931,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; function there. ; CHECK-NEXT: cmp w[[OLD]], w0 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] - ; As above, w1 is a reasonable guess. -; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] +; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] ; CHECK-NOT: dmb diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll index 1eec4cc7f4e..3a5dbdc945c 100644 --- a/test/CodeGen/AArch64/blockaddress.ll +++ b/test/CodeGen/AArch64/blockaddress.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s @addr = global i8* null diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll index 591f48303e2..952404495ce 100644 --- a/test/CodeGen/AArch64/breg.ll +++ b/test/CodeGen/AArch64/breg.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s @stored_label = global i8* null diff --git a/test/CodeGen/AArch64/cmpxchg-idioms.ll b/test/CodeGen/AArch64/cmpxchg-idioms.ll new file mode 100644 index 00000000000..a6b96af99dd --- /dev/null +++ b/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -0,0 +1,89 @@ +; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s + +define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) { +; CHECK-LABEL: test_return: + +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[LOADED:w[0-9]+]], [x0] +; CHECK: cmp [[LOADED]], w1 +; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]] + +; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x0] +; CHECK: cbnz [[STATUS]], [[LOOP]] + +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: orr w0, wzr, #0x1 +; CHECK: ret + +; CHECK: [[FAILED]]: +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: mov w0, wzr +; CHECK: ret + + %loaded = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst + %success = icmp eq i32 %loaded, %oldval + %conv = zext i1 %success to i32 + ret i32 %conv +} + +define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) { +; CHECK-LABEL: test_return_bool: + +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxrb [[LOADED:w[0-9]+]], [x0] +; CHECK: cmp [[LOADED]], w1, uxtb +; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]] + +; CHECK: stlxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x0] +; CHECK: cbnz [[STATUS]], [[LOOP]] + +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: mov w0, wzr +; CHECK: ret + +; CHECK: [[FAILED]]: +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: orr w0, wzr, #0x1 +; CHECK: ret + + %loaded = cmpxchg i8* %value, i8 %oldValue, i8 %newValue acq_rel monotonic + %failure = icmp ne i8 %loaded, %oldValue + ret i1 %failure +} + +define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) { +; CHECK-LABEL: test_conditional: + +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[LOADED:w[0-9]+]], [x0] +; CHECK: cmp [[LOADED]], w1 +; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]] + +; CHECK: stlxr [[STATUS:w[0-9]+]], w2, [x0] +; CHECK: cbnz [[STATUS]], [[LOOP]] + +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: b _bar + +; CHECK: [[FAILED]]: +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: b _baz + + %loaded = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst + %success = icmp eq i32 %loaded, %oldval + br i1 %success, label %true, label %false + +true: + tail call void @bar() #2 + br label %end + +false: + tail call void @baz() #2 + br label %end + +end: + ret void +} + +declare void @bar() +declare void @baz() diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll index 1b519284846..fbea4a6e583 100644 --- a/test/CodeGen/AArch64/directcond.ll +++ b/test/CodeGen/AArch64/directcond.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s --check-prefix=CHECK +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-NOFP %s define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) { ; CHECK-LABEL: test_select_i32: diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll index c9b0b9ff7d8..77bbcddc492 100644 --- a/test/CodeGen/AArch64/flags-multiuse.ll +++ b/test/CodeGen/AArch64/flags-multiuse.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s ; LLVM should be able to cope with multiple uses of the same flag-setting ; instruction at different points of a routine. Either by rematerializing the diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll index 1dfb789ac8e..69fbd9972b8 100644 --- a/test/CodeGen/AArch64/jump-table.ll +++ b/test/CodeGen/AArch64/jump-table.ll @@ -1,6 +1,6 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s define i32 @test_jumptable(i32 %in) { ; CHECK: test_jumptable diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index 5518db3275e..a0de6342cb8 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s ; This file contains tests for the AArch64 load/store optimizer. diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll index 8a2fe26803e..5dc7b5df475 100644 --- a/test/CodeGen/AArch64/tst-br.ll +++ b/test/CodeGen/AArch64/tst-br.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -aarch64-atomic-cfg-tidy=0 | FileCheck %s ; We've got the usual issues with LLVM reordering blocks here. The ; tests are correct for the current order, but who knows when that diff --git a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll index ca5ae8b62e8..2597b413ec7 100644 --- a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll +++ b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s +; RUN: llc -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "armv7-eabi" diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll index 4fb2be02ce9..38eb0ea2c89 100644 --- a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll +++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=cortex-a8 -mattr=-neonfp < %s | FileCheck %s +; RUN: llc -mcpu=cortex-a8 -mattr=-neonfp -arm-atomic-cfg-tidy=0 < %s | FileCheck %s ; PR5423 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll index 60379984fab..e7e0580179c 100644 --- a/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll +++ b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=arm1136jf-s | FileCheck %s +; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=arm1136jf-s -arm-atomic-cfg-tidy=0 | FileCheck %s ; Radar 8589805: Counting the number of microcoded operations, such as for an ; LDM instruction, was causing an assertion failure because the microop count ; was being treated as an instruction count. diff --git a/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll index 1bbb7b47a95..3950c9e081f 100644 --- a/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll +++ b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -O3 -mtriple=armv6-apple-darwin -relocation-model=pic -mcpu=arm1136jf-s | FileCheck %s +; RUN: llc < %s -asm-verbose=false -O3 -mtriple=armv6-apple-darwin -relocation-model=pic -mcpu=arm1136jf-s -arm-atomic-cfg-tidy=0 | FileCheck %s ; rdar://8959122 illegal register operands for UMULL instruction ; in cfrac nightly test. ; Armv6 generates a umull that must write to two distinct destination regs. diff --git a/test/CodeGen/ARM/2012-11-14-subs_carry.ll b/test/CodeGen/ARM/2012-11-14-subs_carry.ll index 8df295a2f65..33083303a3d 100644 --- a/test/CodeGen/ARM/2012-11-14-subs_carry.ll +++ b/test/CodeGen/ARM/2012-11-14-subs_carry.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s ;CHECK-LABEL: foo: ;CHECK: adds diff --git a/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll b/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll index a438c1f4556..05a4ef05e95 100644 --- a/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll +++ b/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 | FileCheck %s ; ModuleID = 'bugpoint-reduced-simplified.bc' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" target triple = "armv7--linux-gnueabi" diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index bf827d6b66e..14eef832e69 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -1,8 +1,8 @@ -; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck -check-prefix=ARM %s -; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck -check-prefix=THUMB %s -; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \ +; RUN: llc -mtriple=arm-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=ARM %s +; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=THUMB %s +; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \ ; RUN: | FileCheck -check-prefix=T2 %s -; RUN: llc -mtriple=thumbv8-eabi %s -o - | FileCheck -check-prefix=V8 %s +; RUN: llc -mtriple=thumbv8-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=V8 %s ; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified. diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll index 9913f30712b..db092de3875 100644 --- a/test/CodeGen/ARM/atomic-64bit.ll +++ b/test/CodeGen/ARM/atomic-64bit.ll @@ -189,9 +189,9 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2 ; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3 -; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG1]] -; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG2]] -; CHECK-THUMB: orrs [[MISMATCH_HI]], [[MISMATCH_LO]] +; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG1]], r2 +; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG2]], r3 +; CHECK-THUMB-LE: orrs [[MISMATCH_HI]], [[MISMATCH_LO]] ; CHECK-THUMB: bne ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} ; CHECK-THUMB: cmp diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll index 40694bfca5c..a35fd747646 100644 --- a/test/CodeGen/ARM/call-tc.ll +++ b/test/CodeGen/ARM/call-tc.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mtriple=armv6-apple-ios5.0 -mattr=+vfp2 | FileCheck %s -check-prefix=CHECKV6 -; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D -; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 \ +; RUN: llc < %s -mtriple=armv6-apple-ios5.0 -mattr=+vfp2 -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=CHECKV6 +; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=CHECKT2D +; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-atomic-cfg-tidy=0 \ ; RUN: | FileCheck %s -check-prefix=CHECKELF ; Enable tailcall optimization for iOS 5.0 diff --git a/test/CodeGen/ARM/cmpxchg-idioms.ll b/test/CodeGen/ARM/cmpxchg-idioms.ll new file mode 100644 index 00000000000..a4f853450b3 --- /dev/null +++ b/test/CodeGen/ARM/cmpxchg-idioms.ll @@ -0,0 +1,101 @@ +; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s | FileCheck %s + +define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) { +; CHECK-LABEL: test_return: + +; CHECK: dmb ishst + +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldrex [[LOADED:r[0-9]+]], [r0] +; CHECK: cmp [[LOADED]], r1 +; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]] + +; CHECK: strex [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0] +; CHECK: cmp [[STATUS]], #0 +; CHECK: bne [[LOOP]] + +; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} +; CHECK: movs r0, #1 +; CHECK: dmb ish +; CHECK: bx lr + +; CHECK: [[FAILED]]: +; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} +; CHECK: movs r0, #0 +; CHECK: dmb ish +; CHECK: bx lr + + %loaded = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst + %success = icmp eq i32 %loaded, %oldval + %conv = zext i1 %success to i32 + ret i32 %conv +} + +define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) { +; CHECK-LABEL: test_return_bool: + +; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1 +; CHECK: dmb ishst + +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldrexb [[LOADED:r[0-9]+]], [r0] +; CHECK: cmp [[LOADED]], [[OLDBYTE]] + +; CHECK: itt ne +; CHECK: movne r0, #1 +; CHECK: bxne lr + +; CHECK: strexb [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0] +; CHECK: cmp [[STATUS]], #0 +; CHECK: bne [[LOOP]] + +; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} +; CHECK: movs r0, #0 +; CHECK: bx lr + + %loaded = cmpxchg i8* %value, i8 %oldValue, i8 %newValue acq_rel monotonic + %failure = icmp ne i8 %loaded, %oldValue + ret i1 %failure +} + +define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) { +; CHECK-LABEL: test_conditional: + +; CHECK: dmb ishst + +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldrex [[LOADED:r[0-9]+]], [r0] +; CHECK: cmp [[LOADED]], r1 +; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]] + +; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: cmp [[STATUS]], #0 +; CHECK: bne [[LOOP]] + +; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} +; CHECK: dmb ish +; CHECK: b.w _bar + +; CHECK: [[FAILED]]: +; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} +; CHECK: dmb ish +; CHECK: b.w _baz + + %loaded = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst + %success = icmp eq i32 %loaded, %oldval + br i1 %success, label %true, label %false + +true: + tail call void @bar() #2 + br label %end + +false: + tail call void @baz() #2 + br label %end + +end: + ret void +} + +declare void @bar() +declare void @baz() diff --git a/test/CodeGen/ARM/data-in-code-annotations.ll b/test/CodeGen/ARM/data-in-code-annotations.ll index da70178225e..5eb81b24de0 100644 --- a/test/CodeGen/ARM/data-in-code-annotations.ll +++ b/test/CodeGen/ARM/data-in-code-annotations.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=armv7-apple-darwin -arm-atomic-cfg-tidy=0 | FileCheck %s define double @f1() nounwind { ; CHECK-LABEL: f1: diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll index c7217560f48..f50d0b96fe9 100644 --- a/test/CodeGen/ARM/fptoint.ll +++ b/test/CodeGen/ARM/fptoint.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm-eabi -mattr=+v6,+vfp2 %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -arm-atomic-cfg-tidy=0 -mattr=+v6,+vfp2 %s -o - | FileCheck %s @i = weak global i32 0 ; [#uses=2] @u = weak global i32 0 ; [#uses=2] diff --git a/test/CodeGen/ARM/ifcvt-branch-weight.ll b/test/CodeGen/ARM/ifcvt-branch-weight.ll index cd8a5613862..a994d3d01ae 100644 --- a/test/CodeGen/ARM/ifcvt-branch-weight.ll +++ b/test/CodeGen/ARM/ifcvt-branch-weight.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv8 -print-machineinstrs=if-converter -o /dev/null 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -print-machineinstrs=if-converter -arm-atomic-cfg-tidy=0 -o /dev/null 2>&1 | FileCheck %s %struct.S = type { i8* (i8*)*, [1 x i8] } define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly { diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll index 26c72723b28..509c182fc97 100644 --- a/test/CodeGen/ARM/ifcvt10.ll +++ b/test/CodeGen/ARM/ifcvt10.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-ios -arm-atomic-cfg-tidy=0 -mcpu=cortex-a9 | FileCheck %s ; rdar://8402126 ; Make sure if-converter is not predicating vldmia and ldmia. These are ; micro-coded and would have long issue latency even if predicated on diff --git a/test/CodeGen/ARM/indirectbr-3.ll b/test/CodeGen/ARM/indirectbr-3.ll index 5a9c45902ed..291fedb8110 100644 --- a/test/CodeGen/ARM/indirectbr-3.ll +++ b/test/CodeGen/ARM/indirectbr-3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s ; If ARMBaseInstrInfo::AnalyzeBlocks returns the wrong value, which was possible ; for blocks with indirect branches, the IfConverter could end up deleting diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll index 1dafa0067b5..3ad60d47b53 100644 --- a/test/CodeGen/ARM/lsr-unfolded-offset.ll +++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll @@ -1,4 +1,4 @@ -; RUN: llc -regalloc=greedy < %s | FileCheck %s +; RUN: llc -regalloc=greedy -arm-atomic-cfg-tidy=0 < %s | FileCheck %s ; LSR shouldn't introduce more induction variables than needed, increasing ; register pressure and therefore spilling. There is more room for improvement diff --git a/test/CodeGen/ARM/misched-copy-arm.ll b/test/CodeGen/ARM/misched-copy-arm.ll index 26adf0c2ad0..bb2d42ca9ed 100644 --- a/test/CodeGen/ARM/misched-copy-arm.ll +++ b/test/CodeGen/ARM/misched-copy-arm.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc -mtriple=thumb-eabi -mcpu=swift -pre-RA-sched=source -join-globalcopies -enable-misched -verify-misched -debug-only=misched %s -o - 2>&1 | FileCheck %s +; RUN: llc -mtriple=thumb-eabi -mcpu=swift -pre-RA-sched=source -join-globalcopies -enable-misched -verify-misched -debug-only=misched -arm-atomic-cfg-tidy=0 %s -o - 2>&1 | FileCheck %s ; ; Loop counter copies should be eliminated. ; There is also a MUL here, but we don't care where it is scheduled. diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index b245674c3c9..feed5ad2830 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 -regalloc=basic | FileCheck %s ; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's. %struct.int16x8_t = type { <8 x i16> } diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll index b9246635e40..4fa97ea5b68 100644 --- a/test/CodeGen/ARM/spill-q.ll +++ b/test/CodeGen/ARM/spill-q.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon | FileCheck %s +; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon -arm-atomic-cfg-tidy=0 | FileCheck %s ; PR4789 %bar = type { float, float, float } diff --git a/test/CodeGen/ARM/struct-byval-frame-index.ll b/test/CodeGen/ARM/struct-byval-frame-index.ll index 465ee1218fd..0fd55ec6c94 100644 --- a/test/CodeGen/ARM/struct-byval-frame-index.ll +++ b/test/CodeGen/ARM/struct-byval-frame-index.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=cortex-a15 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mcpu=cortex-a15 -verify-machineinstrs -arm-atomic-cfg-tidy=0 | FileCheck %s ; Check a spill right after a function call with large struct byval is correctly ; generated. diff --git a/test/CodeGen/ARM/twoaddrinstr.ll b/test/CodeGen/ARM/twoaddrinstr.ll index 8da875fef27..01df3b42d10 100644 --- a/test/CodeGen/ARM/twoaddrinstr.ll +++ b/test/CodeGen/ARM/twoaddrinstr.ll @@ -1,5 +1,5 @@ ; Tests for the two-address instruction pass. -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -arm-atomic-cfg-tidy=0 %s -o - | FileCheck %s define void @PR13378() nounwind { ; This was orriginally a crasher trying to schedule the instructions. diff --git a/test/CodeGen/ARM/vldm-sched-a9.ll b/test/CodeGen/ARM/vldm-sched-a9.ll index d0a9ac6d2b5..f2e5eb9b7e0 100644 --- a/test/CodeGen/ARM/vldm-sched-a9.ll +++ b/test/CodeGen/ARM/vldm-sched-a9.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mtriple=armv7-linux-gnueabihf -float-abi=hard -mcpu=cortex-a9 -O3 | FileCheck %s +; RUN: llc < %s -march=arm -mtriple=armv7-linux-gnueabihf -arm-atomic-cfg-tidy=0 -float-abi=hard -mcpu=cortex-a9 -O3 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32-S64" diff --git a/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll index e0144531454..09e0ed1ead6 100644 --- a/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll +++ b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-atomic-cfg-tidy=0 | FileCheck %s @csize = external global [100 x [20 x [4 x i8]]] ; <[100 x [20 x [4 x i8]]]*> [#uses=1] @vsize = external global [100 x [20 x [4 x i8]]] ; <[100 x [20 x [4 x i8]]]*> [#uses=1] diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll index 940cfd15e08..c8eac8d4d09 100644 --- a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll +++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -arm-atomic-cfg-tidy=0 | FileCheck %s ; PR4659 ; PR4682 diff --git a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll index 52066d3f86a..a9a2478e403 100644 --- a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll +++ b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-cgp-branch-opts | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-cgp-branch-opts -arm-atomic-cfg-tidy=0 | FileCheck %s %struct.pix_pos = type { i32, i32, i32, i32, i32, i32 } diff --git a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll index 1b8bdb1c19b..8beb5b1c894 100644 --- a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll +++ b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 -O3 | FileCheck %s ; rdar://7493908 ; Make sure the result of the first dynamic_alloc isn't copied back to sp more diff --git a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll index 810bfb79020..f3046e1fcb8 100644 --- a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll +++ b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic -arm-atomic-cfg-tidy=0 | FileCheck %s ; rdar://8115404 ; Tail merging must not split an IT block. diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll index 75f5439b98c..3d89390d04c 100644 --- a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll +++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll @@ -1,5 +1,5 @@ ; rdar://8465407 -; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s %struct.buf = type opaque diff --git a/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll index b1ce3bb935e..240df83252c 100644 --- a/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll +++ b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=thumbv7-apple-darwin10 < %s | FileCheck %s +; RUN: llc -mtriple=thumbv7-apple-darwin10 -arm-atomic-cfg-tidy=0 < %s | FileCheck %s %struct.op = type { %struct.op*, %struct.op*, %struct.op* ()*, i32, i16, i16, i8, i8 } diff --git a/test/CodeGen/Thumb2/buildvector-crash.ll b/test/CodeGen/Thumb2/buildvector-crash.ll index 8a3c895bbe5..16e2298522f 100644 --- a/test/CodeGen/Thumb2/buildvector-crash.ll +++ b/test/CodeGen/Thumb2/buildvector-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O3 -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -O3 -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 -mcpu=cortex-a8 | FileCheck %s ; Formerly crashed, 3573915. define void @RotateStarsFP_Vec() nounwind { diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll index a9f948cf717..88c7f0f17ab 100644 --- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll +++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 | FileCheck %s define void @fht(float* nocapture %fz, i16 signext %n) nounwind { ; CHECK-LABEL: fht: diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll index 36544d16d6f..d20eef0c8bb 100644 --- a/test/CodeGen/Thumb2/ldr-str-imm12.ll +++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-atomic-cfg-tidy=0 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s ; rdar://7352504 ; Make sure we use "str r9, [sp, #+28]" instead of "sub.w r4, r7, #256" followed by "str r9, [r4, #-32]". diff --git a/test/CodeGen/Thumb2/thumb2-branch.ll b/test/CodeGen/Thumb2/thumb2-branch.ll index a00b22d8502..332ed50ede6 100644 --- a/test/CodeGen/Thumb2/thumb2-branch.ll +++ b/test/CodeGen/Thumb2/thumb2-branch.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 -arm-atomic-cfg-tidy=0 | FileCheck %s ; If-conversion defeats the purpose of this test, which is to check ; conditional branch generation, so a call to make sure it doesn't ; happen and we get actual branches. diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll index 893bd0fdaef..f0f79168c90 100644 --- a/test/CodeGen/Thumb2/thumb2-cbnz.ll +++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 | FileCheck %s ; rdar://7354379 declare double @foo(double) nounwind readnone diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll index 403cd48035b..a861912fe11 100644 --- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll +++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s -; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-default-it | FileCheck %s -; RUN: llc < %s -mtriple=thumbv8-apple-ios -arm-no-restrict-it | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 -arm-default-it | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8-apple-ios -arm-atomic-cfg-tidy=0 -arm-no-restrict-it | FileCheck %s define void @foo(i32 %X, i32 %Y) { entry: diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll index a71aa3fb613..79667d43b95 100644 --- a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll +++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s -; RUN: llc < %s -mtriple=thumbv8-apple-darwin -arm-no-restrict-it | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-atomic-cfg-tidy=0 -arm-default-it | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8-apple-darwin -arm-atomic-cfg-tidy=0 -arm-no-restrict-it | FileCheck %s ; There shouldn't be a unconditional branch at end of bb52. ; rdar://7184787 diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll index 52c10634491..94f472593b3 100644 --- a/test/CodeGen/Thumb2/thumb2-spill-q.ll +++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon -arm-atomic-cfg-tidy=0 | FileCheck %s ; PR4789 %bar = type { float, float, float } diff --git a/test/CodeGen/Thumb2/v8_IT_3.ll b/test/CodeGen/Thumb2/v8_IT_3.ll index 4dca24629b0..a028deebc8e 100644 --- a/test/CodeGen/Thumb2/v8_IT_3.ll +++ b/test/CodeGen/Thumb2/v8_IT_3.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s -; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck %s -; RUN: llc < %s -mtriple=thumbv8 -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC -; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC +; RUN: llc < %s -mtriple=thumbv8 -arm-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7 -arm-atomic-cfg-tidy=0 -arm-restrict-it | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -arm-atomic-cfg-tidy=0 -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC +; RUN: llc < %s -mtriple=thumbv7 -arm-atomic-cfg-tidy=0 -arm-restrict-it -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC %struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* } %struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 } diff --git a/test/CodeGen/Thumb2/v8_IT_5.ll b/test/CodeGen/Thumb2/v8_IT_5.ll index 2f352d6d5f1..2da75ad2143 100644 --- a/test/CodeGen/Thumb2/v8_IT_5.ll +++ b/test/CodeGen/Thumb2/v8_IT_5.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s -; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -arm-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7 -arm-atomic-cfg-tidy=0 -arm-restrict-it | FileCheck %s ; CHECK: it ne ; CHECK-NEXT: cmpne ; CHECK-NEXT: bne [[JUMPTARGET:.LBB[0-9]+_[0-9]+]] diff --git a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll index 756ea82f37f..1d56ddea244 100644 --- a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll +++ b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll @@ -1,4 +1,4 @@ -; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s +; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - -arm-atomic-cfg-tidy=0 | FileCheck %s ; ; LSR should only check for valid address modes when the IV user is a ; memory address.