From b619dd5d5b69ba9f4571a96e1a96d09d8aed03a7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 12 Jul 2013 18:06:44 +0000 Subject: [PATCH] X86: Shrink certain forms of movsx. In particular: movsbw %al, %ax --> cbtw movswl %ax, %eax --> cwtl movslq %eax, %rax --> cltq According to Intel's manual those have the same performance characteristics but come with a smaller encoding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186174 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86MCInstLower.cpp | 35 +++++++++++++++++++++++++++ test/CodeGen/X86/fast-isel-ret-ext.ll | 2 +- test/CodeGen/X86/mcinst-lowering.ll | 18 ++++++++++++++ test/CodeGen/X86/shl_elim.ll | 9 ++++--- test/CodeGen/X86/widen_conv-2.ll | 4 +-- 5 files changed, 62 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 3fea4cb560e..c7c00b536ce 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -254,6 +254,34 @@ static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) { Inst.addOperand(Saved); } +/// \brief If a movsx instruction has a shorter encoding for the used register +/// simplify the instruction to use it instead. +static void SimplifyMOVSX(MCInst &Inst) { + unsigned NewOpcode = 0; + unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg(); + switch (Inst.getOpcode()) { + default: + llvm_unreachable("Unexpected instruction!"); + case X86::MOVSX16rr8: // movsbw %al, %ax --> cbtw + if (Op0 == X86::AX && Op1 == X86::AL) + NewOpcode = X86::CBW; + break; + case X86::MOVSX32rr16: // movswl %ax, %eax --> cwtl + if (Op0 == X86::EAX && Op1 == X86::AX) + NewOpcode = X86::CWDE; + break; + case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq + if (Op0 == X86::RAX && Op1 == X86::EAX) + NewOpcode = X86::CDQE; + break; + } + + if (NewOpcode != 0) { + Inst = MCInst(); + Inst.setOpcode(NewOpcode); + } +} + /// \brief Simplify things like MOV32rm to MOV32o32a. static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, unsigned Opcode) { @@ -557,6 +585,13 @@ ReSimplify: case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break; case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break; + // Try to shrink some forms of movsx. + case X86::MOVSX16rr8: + case X86::MOVSX32rr16: + case X86::MOVSX64rr32: + SimplifyMOVSX(OutMI); + break; + case X86::MORESTACK_RET: OutMI.setOpcode(X86::RET); break; diff --git a/test/CodeGen/X86/fast-isel-ret-ext.ll b/test/CodeGen/X86/fast-isel-ret-ext.ll index fd768cb9b33..395ad1e616a 100644 --- a/test/CodeGen/X86/fast-isel-ret-ext.ll +++ b/test/CodeGen/X86/fast-isel-ret-ext.ll @@ -26,7 +26,7 @@ define signext i16 @test4(i32 %y) nounwind { %conv = trunc i32 %y to i16 ret i16 %conv ; CHECK: test4: - ; CHECK: movswl {{.*}}, %eax + ; CHECK: {{(movswl.%.x, %eax|cwtl)}} } define zeroext i1 @test5(i32 %y) nounwind { diff --git a/test/CodeGen/X86/mcinst-lowering.ll b/test/CodeGen/X86/mcinst-lowering.ll index 1ef5a971bab..391f9398f0b 100644 --- a/test/CodeGen/X86/mcinst-lowering.ll +++ b/test/CodeGen/X86/mcinst-lowering.ll @@ -24,3 +24,21 @@ if.end: ; preds = %entry return: ; preds = %entry ret i32 0 } + +define i32 @f1() nounwind { + %ax = tail call i16 asm sideeffect "", "={ax},~{dirflag},~{fpsr},~{flags}"() + %conv = sext i16 %ax to i32 + ret i32 %conv + +; CHECK: f1: +; CHECK: cwtl ## encoding: [0x98] +} + +define i64 @f2() nounwind { + %eax = tail call i32 asm sideeffect "", "={ax},~{dirflag},~{fpsr},~{flags}"() + %conv = sext i32 %eax to i64 + ret i64 %conv + +; CHECK: f2: +; CHECK: cltq ## encoding: [0x48,0x98] +} diff --git a/test/CodeGen/X86/shl_elim.ll b/test/CodeGen/X86/shl_elim.ll index 83e1eb5c39e..e99ecac8723 100644 --- a/test/CodeGen/X86/shl_elim.ll +++ b/test/CodeGen/X86/shl_elim.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -march=x86 | grep "movl 8(.esp), %eax" -; RUN: llc < %s -march=x86 | grep "shrl .eax" -; RUN: llc < %s -march=x86 | grep "movswl .ax, .eax" +; RUN: llc < %s -march=x86 | FileCheck %s define i32 @test1(i64 %a) nounwind { %tmp29 = lshr i64 %a, 24 ; [#uses=1] @@ -9,5 +7,10 @@ define i32 @test1(i64 %a) nounwind { %tmp45 = trunc i32 %tmp410 to i16 ; [#uses=1] %tmp456 = sext i16 %tmp45 to i32 ; [#uses=1] ret i32 %tmp456 + +; CHECK: test1: +; CHECK: movl 8(%esp), %eax +; CHECK: shrl %eax +; CHECK: cwtl } diff --git a/test/CodeGen/X86/widen_conv-2.ll b/test/CodeGen/X86/widen_conv-2.ll index 969cb512beb..db8fa93dc79 100644 --- a/test/CodeGen/X86/widen_conv-2.ll +++ b/test/CodeGen/X86/widen_conv-2.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s -; CHECK: movswl -; CHECK: movswl +; CHECK: cwtl +; CHECK: cwtl ; sign extension v2i32 to v2i16 -- 2.34.1