From: Quentin Colombet Date: Wed, 1 Jul 2015 23:12:13 +0000 (+0000) Subject: [TwoAddressInstructionPass] Try 3 Addr Conversion After Commuting. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=a1a323c6376719271f7f1506bd38062189328ef6;p=oota-llvm.git [TwoAddressInstructionPass] Try 3 Addr Conversion After Commuting. TwoAddressInstructionPass stops after a successful commuting but 3 Addr conversion might be good for some cases. Consider: int foo(int a, int b) { return a + b; } Before this commit, we emit: addl %esi, %edi movl %edi, %eax ret After this commit, we try 3 Addr conversion: leal (%rsi,%rdi), %eax ret Patch by Volkan Keles ! Differential Revision: http://reviews.llvm.org/D10851 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241206 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 6bceccca778..e84bea63995 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1207,12 +1207,24 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, } } + // If the instruction is convertible to 3 Addr, instead + // of returning try 3 Addr transformation aggresively and + // use this variable to check later. Because it might be better. + // For example, we can just use `leal (%rsi,%rdi), %eax` and `ret` + // instead of the following code. + // addl %esi, %edi + // movl %edi, %eax + // ret + bool commuted = false; + // If it's profitable to commute, try to do so. if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) { + commuted = true; ++NumCommuted; if (AggressiveCommute) ++NumAggrCommuted; - return false; + if (!MI.isConvertibleTo3Addr()) + return false; } if (shouldOnlyCommute) @@ -1220,7 +1232,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // If there is one more use of regB later in the same MBB, consider // re-schedule this MI below it. - if (EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) { + if (!commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) { ++NumReSchedDowns; return true; } @@ -1237,6 +1249,10 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, } } + // Return if it is commuted but 3 addr conversion is failed. + if (commuted) + return false; + // If there is one more use of regB later in the same MBB, consider // re-schedule it before this MI if it's legal. if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) { diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll index 656c385e2bc..5b01e2f4e90 100644 --- a/test/CodeGen/X86/commute-two-addr.ll +++ b/test/CodeGen/X86/commute-two-addr.ll @@ -39,7 +39,7 @@ define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 entry: ; DARWIN-LABEL: t3: ; DARWIN: shlq $32, %rcx -; DARWIN-NEXT: orq %rcx, %rax +; DARWIN-NEXT: leaq (%rax,%rcx), %rax ; DARWIN-NEXT: shll $8 ; DARWIN-NOT: leaq %tmp21 = zext i32 %lb to i64 diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll index b5ca0275d8d..5779cf33ac8 100644 --- a/test/CodeGen/X86/twoaddr-lea.ll +++ b/test/CodeGen/X86/twoaddr-lea.ll @@ -25,8 +25,7 @@ define i32 @test2(i32 inreg %a, i32 inreg %b, i32 %c, i32 %d) nounwind { entry: ; CHECK-LABEL: test2: ; CHECK: leal -; CHECK-NOT: leal -; CHECK-NOT: mov +; CHECK-NEXT: addl ; CHECK-NEXT: addl ; CHECK-NEXT: ret %add = add i32 %b, %a diff --git a/test/CodeGen/X86/win64_params.ll b/test/CodeGen/X86/win64_params.ll index 9718c86300c..a0b552d4d58 100644 --- a/test/CodeGen/X86/win64_params.ll +++ b/test/CodeGen/X86/win64_params.ll @@ -7,8 +7,7 @@ define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind re entry: ; CHECK: movl 48(%rsp), %eax ; CHECK: addl 40(%rsp), %eax -; LINUX: addl %r9d, %r8d -; LINUX: movl %r8d, %eax +; LINUX: leal (%r8,%r9), %eax %add = add nsw i32 %p6, %p5 ret i32 %add } @@ -27,10 +26,8 @@ entry: ; on other platforms here (note the x86_64_sysvcc calling convention). define x86_64_sysvcc i32 @f8(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize { entry: -; CHECK: addl %r9d, %r8d -; CHECK: movl %r8d, %eax -; LINUX: addl %r9d, %r8d -; LINUX: movl %r8d, %eax +; CHECK: leal (%r8,%r9), %eax +; LINUX: leal (%r8,%r9), %eax %add = add nsw i32 %p6, %p5 ret i32 %add } diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll index 7925bf01020..24be0dc42d6 100644 --- a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll +++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll @@ -23,7 +23,7 @@ ; X32: add ; X32: add ; X32: add -; X32: add +; X32: leal ; X32: %for.body.3 define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp { entry: