From: Chris Lattner <sabre@nondot.org>
Date: Wed, 6 Oct 2004 05:01:07 +0000 (+0000)
Subject: Codegen signed mod by 2 or -2 more efficiently.  Instead of generating:
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=2483f679143c4d647a766b6b1ad4f82baf4e6e8e;p=oota-llvm.git

Codegen signed mod by 2 or -2 more efficiently.  Instead of generating:

t:
        mov %EDX, DWORD PTR [%ESP + 4]
        mov %ECX, 2
        mov %EAX, %EDX
        sar %EDX, 31
        idiv %ECX
        mov %EAX, %EDX
        ret

Generate:
t:
        mov %ECX, DWORD PTR [%ESP + 4]
***     mov %EAX, %ECX
        cdq
        and %ECX, 1
        xor %ECX, %EDX
        sub %ECX, %EDX
***     mov %EAX, %ECX
        ret

Note that the two marked moves are redundant, and should be eliminated by the
register allocator, but aren't.

Compare this to GCC, which generates:

t:
        mov     %eax, DWORD PTR [%esp+4]
        mov     %edx, %eax
        shr     %edx, 31
        lea     %ecx, [%edx+%eax]
        and     %ecx, -2
        sub     %eax, %ecx
        ret

or ICC 8.0, which generates:

t:
        movl      4(%esp), %ecx                                 #3.5
        movl      $-2147483647, %eax                            #3.25
        imull     %ecx                                          #3.25
        movl      %ecx, %eax                                    #3.25
        sarl      $31, %eax                                     #3.25
        addl      %ecx, %edx                                    #3.25
        subl      %edx, %eax                                    #3.25
        addl      %eax, %eax                                    #3.25
        negl      %eax                                          #3.25
        subl      %eax, %ecx                                    #3.25
        movl      %ecx, %eax                                    #3.25
        ret                                                     #3.25

We would be in great shape if not for the moves.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@16763 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp
index 3add1c1ad1f..d92a732dee3 100644
--- a/lib/Target/X86/X86ISelSimple.cpp
+++ b/lib/Target/X86/X86ISelSimple.cpp
@@ -2672,14 +2672,14 @@ void X86ISel::emitDivRemOperation(MachineBasicBlock *BB,
   }
 
   static const unsigned MovOpcode[]={ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr };
-  static const unsigned NEGOpcode[] = { X86::NEG8r, X86::NEG16r, X86::NEG32r };
+  static const unsigned NEGOpcode[]={ X86::NEG8r,  X86::NEG16r,  X86::NEG32r };
   static const unsigned SAROpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri };
   static const unsigned SHROpcode[]={ X86::SHR8ri, X86::SHR16ri, X86::SHR32ri };
   static const unsigned ADDOpcode[]={ X86::ADD8rr, X86::ADD16rr, X86::ADD32rr };
 
   // Special case signed division by power of 2.
-  if (isDiv)
-    if (ConstantSInt *CI = dyn_cast<ConstantSInt>(Op1)) {
+  if (ConstantSInt *CI = dyn_cast<ConstantSInt>(Op1))
+    if (isDiv) {
       assert(Class != cLong && "This doesn't handle 64-bit divides!");
       int V = CI->getValue();
 
@@ -2742,6 +2742,42 @@ void X86ISel::emitDivRemOperation(MachineBasicBlock *BB,
           BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(TmpReg4);
         return;
       }
+    } else {    // X % C
+      assert(Class != cLong && "This doesn't handle 64-bit remainder!");
+      int V = CI->getValue();
+
+      if (V == 2 || V == -2) {       // X % 2, X % -2
+        std::cerr << "SREM 2\n";
+        static const unsigned SExtOpcode[] = { X86::CBW, X86::CWD, X86::CDQ };
+        static const unsigned BaseReg[]    = { X86::AL , X86::AX , X86::EAX };
+        static const unsigned SExtReg[]    = { X86::AH , X86::DX , X86::EDX };
+        static const unsigned ANDOpcode[]  = {
+          X86::AND8ri, X86::AND16ri, X86::AND32ri
+        };
+        static const unsigned XOROpcode[]  = {
+          X86::XOR8rr, X86::XOR16rr, X86::XOR32rr
+        };
+        static const unsigned SUBOpcode[]  = {
+          X86::SUB8rr, X86::SUB16rr, X86::SUB32rr
+        };
+
+        // Sign extend result into reg of -1 or 0.
+        unsigned Op0Reg = getReg(Op0, BB, IP);
+        BuildMI(*BB, IP, MovOpcode[Class], 1, BaseReg[Class]).addReg(Op0Reg);
+        BuildMI(*BB, IP, SExtOpcode[Class], 0);
+        unsigned TmpReg0 = makeAnotherReg(Op0->getType());
+        BuildMI(*BB, IP, MovOpcode[Class], 1, TmpReg0).addReg(SExtReg[Class]);
+
+        unsigned TmpReg1 = makeAnotherReg(Op0->getType());
+        BuildMI(*BB, IP, ANDOpcode[Class], 2, TmpReg1).addReg(Op0Reg).addImm(1);
+        
+        unsigned TmpReg2 = makeAnotherReg(Op0->getType());
+        BuildMI(*BB, IP, XOROpcode[Class], 2,
+                TmpReg2).addReg(TmpReg1).addReg(TmpReg0);
+        BuildMI(*BB, IP, SUBOpcode[Class], 2,
+                ResultReg).addReg(TmpReg2).addReg(TmpReg0);
+        return;
+      }
     }
 
   static const unsigned Regs[]     ={ X86::AL    , X86::AX     , X86::EAX     };