From fdc5012e06efa540dabcbaecf448a7baa338271f Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Tue, 11 Aug 2015 08:19:43 +0000 Subject: [PATCH] Allow PeepholeOptimizer to fold a few more cases The condition for clearing the folding candidate list was clamped together with the "uninteresting instruction" condition. This is too conservative, e.g. we don't need to clear the list when encountering an IMPLICIT_DEF. Differential Revision: http://reviews.llvm.org/D11591 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244577 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PeepholeOptimizer.cpp | 9 ++++----- test/CodeGen/X86/avx-cvt.ll | 3 +-- test/CodeGen/X86/shift-bmi2.ll | 20 +++++++++----------- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 21d603b19ef..3dc73bac5b8 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -1236,14 +1236,13 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // If there exists an instruction which belongs to the following // categories, we will discard the load candidates. + if (MI->mayStore() || MI->isCall() || MI->hasUnmodeledSideEffects()) + FoldAsLoadDefCandidates.clear(); + if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || - MI->hasUnmodeledSideEffects()) { - FoldAsLoadDefCandidates.clear(); + MI->hasUnmodeledSideEffects()) continue; - } - if (MI->mayStore() || MI->isCall()) - FoldAsLoadDefCandidates.clear(); if ((isUncoalescableCopy(*MI) && optimizeUncoalescableCopy(MI, LocalMIs)) || diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll index 6df3e5324c1..2f039862b36 100644 --- a/test/CodeGen/X86/avx-cvt.ll +++ b/test/CodeGen/X86/avx-cvt.ll @@ -113,8 +113,7 @@ define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp { define void @fpext() nounwind uwtable { ; CHECK-LABEL: fpext: ; CHECK: # BB#0: -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vcvtss2sd -{{[0-9]+}}(%rsp), %xmm0, %xmm0 ; CHECK-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: retq %f = alloca float, align 4 diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll index 63b6ec55fac..fdeddffdfb0 100644 --- a/test/CodeGen/X86/shift-bmi2.ll +++ b/test/CodeGen/X86/shift-bmi2.ll @@ -30,11 +30,10 @@ entry: %x = load i32, i32* %p %shl = shl i32 %x, %shamt ; BMI2: shl32p -; Source order scheduling prevents folding, rdar:14208996. -; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI2: ret ; BMI264: shl32p -; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i32 %shl } @@ -75,7 +74,7 @@ entry: %x = load i64, i64* %p %shl = shl i64 %x, %shamt ; BMI264: shl64p -; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i64 %shl } @@ -107,11 +106,10 @@ entry: %x = load i32, i32* %p %shl = lshr i32 %x, %shamt ; BMI2: lshr32p -; Source order scheduling prevents folding, rdar:14208996. -; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI2: ret ; BMI264: lshr32p -; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i32 %shl } @@ -130,7 +128,7 @@ entry: %x = load i64, i64* %p %shl = lshr i64 %x, %shamt ; BMI264: lshr64p -; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i64 %shl } @@ -153,10 +151,10 @@ entry: %shl = ashr i32 %x, %shamt ; BMI2: ashr32p ; Source order scheduling prevents folding, rdar:14208996. -; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI2: ret ; BMI264: ashr32p -; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i32 %shl } @@ -175,7 +173,7 @@ entry: %x = load i64, i64* %p %shl = ashr i64 %x, %shamt ; BMI264: ashr64p -; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i64 %shl } -- 2.34.1