From: Chad Rosier Date: Wed, 30 Sep 2015 19:44:40 +0000 (+0000) Subject: [AArch64] Remove an unnecessary restriction on pre-index instructions. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=81122714a34e8407cea088fd33db496c14612f20;p=oota-llvm.git [AArch64] Remove an unnecessary restriction on pre-index instructions. Previously, the index was constrained to the size of the memory operation for no apparent reason. This change removes that constraint so that we can form pre-index instructions with any valid offset. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248931 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 1e73328d3a2..de3930cd0ce 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -959,7 +959,6 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); int Offset = getLdStOffsetOp(MemMI).getImm(); - unsigned MemSize = getMemScale(MemMI); // If the load/store is the first instruction in the block, there's obviously // not any matching update. Ditto if the memory offset isn't zero. @@ -991,7 +990,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( ++Count; // If we found a match, return it. - if (isMatchingUpdateInsn(I, MI, BaseReg, MemSize)) + if (isMatchingUpdateInsn(I, MI, BaseReg, Offset)) return MBBI; // Update the status of what the instruction clobbered and used. diff --git a/test/CodeGen/AArch64/arm64-aapcs-be.ll b/test/CodeGen/AArch64/arm64-aapcs-be.ll index f27570acc82..e77952e4b8a 100644 --- a/test/CodeGen/AArch64/arm64-aapcs-be.ll +++ b/test/CodeGen/AArch64/arm64-aapcs-be.ll @@ -32,7 +32,7 @@ define float @test_block_addr([8 x float], [1 x float] %in) { define void @test_block_addr_callee() { ; CHECK-LABEL: test_block_addr_callee: -; CHECK: str {{[a-z0-9]+}}, [sp] +; CHECK: str {{[a-z0-9]+}}, [sp, #-16]! ; CHECK: bl test_block_addr %val = insertvalue [1 x float] undef, float 0.0, 0 call float @test_block_addr([8 x float] undef, [1 x float] %val) diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll index 1c1b58b8b14..dc9884f12f5 100644 --- a/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/test/CodeGen/AArch64/arm64-abi_align.ll @@ -508,7 +508,7 @@ entry: ; "i64 %0" should be in register x7. ; "i32 8" should be on stack at [sp]. ; CHECK: ldr x7, [{{x[0-9]+}}] -; CHECK: str {{w[0-9]+}}, [sp] +; CHECK: str {{w[0-9]+}}, [sp, #-16]! ; FAST-LABEL: i64_split ; FAST: ldr x7, [{{x[0-9]+}}] ; FAST: mov x[[R0:[0-9]+]], sp diff --git a/test/CodeGen/AArch64/arm64-join-reserved.ll b/test/CodeGen/AArch64/arm64-join-reserved.ll index dee03448354..c65cf95be2e 100644 --- a/test/CodeGen/AArch64/arm64-join-reserved.ll +++ b/test/CodeGen/AArch64/arm64-join-reserved.ll @@ -5,7 +5,7 @@ target triple = "arm64-apple-macosx10" ; A move isn't necessary. ; ; CHECK-LABEL: g: -; CHECK: str xzr, [sp] +; CHECK: str xzr, [sp, #-16]! ; CHECK: bl ; CHECK: ret define void @g() nounwind ssp { diff --git a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll index b8236c5b247..c2006ccdd06 100644 --- a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll +++ b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll @@ -7,7 +7,7 @@ define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { entry: ; CHECK-LABEL: jscall_patchpoint_codegen: ; CHECK: Ltmp -; CHECK: str x{{.+}}, [sp] +; CHECK: str x{{.+}}, [sp, #-16]! ; CHECK-NEXT: mov x0, x{{.+}} ; CHECK: Ltmp ; CHECK-NEXT: movz x16, #0xffff, lsl #32 @@ -16,7 +16,7 @@ entry: ; CHECK-NEXT: blr x16 ; FAST-LABEL: jscall_patchpoint_codegen: ; FAST: Ltmp -; FAST: str x{{.+}}, [sp] +; FAST: str x{{.+}}, [sp, #-16]! ; FAST: Ltmp ; FAST-NEXT: movz x16, #0xffff, lsl #32 ; FAST-NEXT: movk x16, #0xdead, lsl #16 @@ -50,7 +50,7 @@ entry: ; FAST: orr [[REG1:x[0-9]+]], xzr, #0x2 ; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4 ; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 -; FAST-NEXT: str [[REG1]], [sp] +; FAST-NEXT: str [[REG1]], [sp, #-32]! ; FAST-NEXT: str [[REG2]], [sp, #16] ; FAST-NEXT: str [[REG3]], [sp, #24] ; FAST: Ltmp @@ -90,7 +90,7 @@ entry: ; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 ; FAST-NEXT: orr [[REG4:w[0-9]+]], wzr, #0x8 ; FAST-NEXT: movz [[REG5:x[0-9]+]], #0xa -; FAST-NEXT: str [[REG1]], [sp] +; FAST-NEXT: str [[REG1]], [sp, #-64]! ; FAST-NEXT: str [[REG2]], [sp, #16] ; FAST-NEXT: str [[REG3]], [sp, #24] ; FAST-NEXT: str [[REG4]], [sp, #36] diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll index a392619a768..b5e03f08280 100644 --- a/test/CodeGen/AArch64/fastcc-reserved.ll +++ b/test/CodeGen/AArch64/fastcc-reserved.ll @@ -16,7 +16,7 @@ define fastcc void @foo(i32 %in) { ; CHECK: mov x29, sp ; Reserve space for call-frame: -; CHECK: sub sp, sp, #16 +; CHECK: str w{{[0-9]+}}, [sp, #-16]! call fastcc void @will_pop([8 x i32] undef, i32 42) ; CHECK: bl will_pop @@ -42,7 +42,7 @@ define void @foo1(i32 %in) { ; CHECK: mov x29, sp ; Reserve space for call-frame -; CHECK: sub sp, sp, #16 +; CHECK: str w{{[0-9]+}}, [sp, #-16]! call void @wont_pop([8 x i32] undef, i32 42) ; CHECK: bl wont_pop diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll index 9917fcd044f..f021eb23261 100644 --- a/test/CodeGen/AArch64/fastcc.ll +++ b/test/CodeGen/AArch64/fastcc.ll @@ -7,12 +7,12 @@ define fastcc void @func_stack0() { ; CHECK-LABEL: func_stack0: ; CHECK: mov x29, sp -; CHECK-NEXT: sub sp, sp, #32 +; CHECK: str w{{[0-9]+}}, [sp, #-32]! ; CHECK-TAIL-LABEL: func_stack0: ; CHECK-TAIL: stp x29, x30, [sp, #-16]! ; CHECK-TAIL-NEXT: mov x29, sp -; CHECK-TAIL-NEXT: sub sp, sp, #32 +; CHECK-TAIL: str w{{[0-9]+}}, [sp, #-32]! call fastcc void @func_stack8([8 x i32] undef, i32 42) @@ -55,13 +55,13 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) { ; CHECK-LABEL: func_stack8: ; CHECK: stp x29, x30, [sp, #-16]! ; CHECK: mov x29, sp -; CHECK: sub sp, sp, #32 +; CHECK: str w{{[0-9]+}}, [sp, #-32]! ; CHECK-TAIL-LABEL: func_stack8: ; CHECK-TAIL: stp x29, x30, [sp, #-16]! ; CHECK-TAIL: mov x29, sp -; CHECK-TAIL: sub sp, sp, #32 +; CHECK-TAIL: str w{{[0-9]+}}, [sp, #-32]! call fastcc void @func_stack8([8 x i32] undef, i32 42) diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll index 22a33157fd5..2f45666ba13 100644 --- a/test/CodeGen/AArch64/func-calls.ll +++ b/test/CodeGen/AArch64/func-calls.ll @@ -89,11 +89,11 @@ define void @check_stack_args() { ; that varstruct is passed on the stack. Rather dependent on how a ; memcpy gets created, but the following works for now. -; CHECK-DAG: str {{q[0-9]+}}, [sp] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #-16] ; CHECK-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 ; CHECK: mov v0.16b, v[[FINAL_DOUBLE]].16b -; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp] +; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp, #-16]! ; CHECK-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 ; CHECK-NONEON: fmov d0, d[[FINAL_DOUBLE]] diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index bd0567c28b2..d2133213f18 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -276,11 +276,11 @@ bar: ; ; with X being either w0, x0, s0, d0 or q0. -%pre.struct.i32 = type { i32, i32, i32} -%pre.struct.i64 = type { i32, i64, i64} -%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>} -%pre.struct.float = type { i32, float, float} -%pre.struct.double = type { i32, double, double} +%pre.struct.i32 = type { i32, i32, i32, i32, i32} +%pre.struct.i64 = type { i32, i64, i64, i64, i64} +%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>, <2 x i64>} +%pre.struct.float = type { i32, float, float, float} +%pre.struct.double = type { i32, double, double, double} define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond, %pre.struct.i32* %load2) nounwind { @@ -372,6 +372,96 @@ return: ret double %ret } +define i32 @load-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond, + %pre.struct.i32* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-word3 +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #12]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i32*, %pre.struct.i32** %this + %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4 + br label %return +return: + %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load i32, i32* %retptr + ret i32 %ret +} + +define i64 @load-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond, + %pre.struct.i64* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-doubleword3 +; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #16]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i64*, %pre.struct.i64** %this + %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load i64, i64* %retptr + ret i64 %ret +} + +define <2 x i64> @load-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond, + %pre.struct.i128* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-quadword3 +; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i128*, %pre.struct.i128** %this + %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load <2 x i64>, <2 x i64>* %retptr + ret <2 x i64> %ret +} + +define float @load-pre-indexed-float3(%pre.struct.float** %this, i1 %cond, + %pre.struct.float* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-float3 +; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #8]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.float*, %pre.struct.float** %this + %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load float, float* %retptr + ret float %ret +} + +define double @load-pre-indexed-double3(%pre.struct.double** %this, i1 %cond, + %pre.struct.double* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-double3 +; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #16]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.double*, %pre.struct.double** %this + %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load double, double* %retptr + ret double %ret +} + ; Check the following transform: ; ; add x8, x8, #16 @@ -477,6 +567,101 @@ return: ret void } +define void @store-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond, + %pre.struct.i32* %load2, + i32 %val) nounwind { +; CHECK-LABEL: store-pre-indexed-word3 +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #12]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i32*, %pre.struct.i32** %this + %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4 + br label %return +return: + %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] + store i32 %val, i32* %retptr + ret void +} + +define void @store-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond, + %pre.struct.i64* %load2, + i64 %val) nounwind { +; CHECK-LABEL: store-pre-indexed-doubleword3 +; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #24]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i64*, %pre.struct.i64** %this + %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 3 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 4 + br label %return +return: + %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] + store i64 %val, i64* %retptr + ret void +} + +define void @store-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond, + %pre.struct.i128* %load2, + <2 x i64> %val) nounwind { +; CHECK-LABEL: store-pre-indexed-quadword3 +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i128*, %pre.struct.i128** %this + %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] + store <2 x i64> %val, <2 x i64>* %retptr + ret void +} + +define void @store-pre-indexed-float3(%pre.struct.float** %this, i1 %cond, + %pre.struct.float* %load2, + float %val) nounwind { +; CHECK-LABEL: store-pre-indexed-float3 +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #8]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.float*, %pre.struct.float** %this + %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] + store float %val, float* %retptr + ret void +} + +define void @store-pre-indexed-double3(%pre.struct.double** %this, i1 %cond, + %pre.struct.double* %load2, + double %val) nounwind { +; CHECK-LABEL: store-pre-indexed-double3 +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #16]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.double*, %pre.struct.double** %this + %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] + store double %val, double* %retptr + ret void +} + ; Check the following transform: ; ; ldr X, [x20]