From 0e29ed081b24359978916b997e91e3e1e2293915 Mon Sep 17 00:00:00 2001 From: Stuart Hastings Date: Fri, 20 May 2011 19:04:40 +0000 Subject: [PATCH] Re-commit 131641 with fixes; de-pseudoize MOVSX16rr8 and friends. rdar://problem/8614450 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131746 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 6 ++--- lib/Target/X86/X86InstrCompiler.td | 31 ++++++++++++++++------- lib/Target/X86/X86InstrExtension.td | 39 +++++++---------------------- lib/Target/X86/X86InstrInfo.td | 8 +++--- lib/Target/X86/X86MCInstLower.cpp | 4 --- test/CodeGen/X86/div8.ll | 22 ++++++++++++++++ 6 files changed, 60 insertions(+), 50 deletions(-) create mode 100644 test/CodeGen/X86/div8.ll diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 89d5f7b8633..1fcc274e0f8 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2006,17 +2006,17 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; Move = - SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16, + SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, MVT::Other, Ops, array_lengthof(Ops)), 0); Chain = Move.getValue(1); ReplaceUses(N0.getValue(1), Chain); } else { Move = - SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); + SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0); Chain = CurDAG->getEntryNode(); } - Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); + Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue()); InFlag = Chain.getValue(1); } else { InFlag = diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 998dfcccf2e..0c70a0f9679 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -997,7 +997,8 @@ def : Pat<(extloadi64i32 addr:$src), // anyext. Define these to do an explicit zero-extend to // avoid partial-register updates. -def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; +def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG + (MOVZX32rr8 GR8 :$src), sub_16bit)>; def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; // Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. @@ -1164,9 +1165,9 @@ def : Pat<(and GR32:$src1, 0xff), Requires<[In32BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), - (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, - GR16_ABCD)), - sub_8bit))>, + (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG + (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)), + sub_16bit)>, Requires<[In32BitMode]>; // r & (2^32-1) ==> movz @@ -1184,7 +1185,8 @@ def : Pat<(and GR32:$src1, 0xff), Requires<[In64BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), - (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>, + (EXTRACT_SUBREG (MOVZX32rr8 (i8 + (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>, Requires<[In64BitMode]>; @@ -1196,10 +1198,11 @@ def : Pat<(sext_inreg GR32:$src, i8), GR32_ABCD)), sub_8bit))>, Requires<[In32BitMode]>; + def : Pat<(sext_inreg GR16:$src, i8), - (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, - GR16_ABCD)), - sub_8bit))>, + (EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG + (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))), + sub_16bit)>, Requires<[In32BitMode]>; def : Pat<(sext_inreg GR64:$src, i32), @@ -1212,9 +1215,19 @@ def : Pat<(sext_inreg GR32:$src, i8), (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>, Requires<[In64BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), - (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>, + (EXTRACT_SUBREG (MOVSX32rr8 + (EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>, Requires<[In64BitMode]>; +// sext, sext_load, zext, zext_load +def: Pat<(i16 (sext GR8:$src)), + (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>; +def: Pat<(sextloadi16i8 addr:$src), + (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>; +def: Pat<(i16 (zext GR8:$src)), + (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>; +def: Pat<(zextloadi16i8 addr:$src), + (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>; // trunc patterns def : Pat<(i16 (trunc GR32:$src)), diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 867c0f8b684..2e1d523ea16 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -38,22 +38,11 @@ let neverHasSideEffects = 1 in { // Sign/Zero extenders -// Use movsbl intead of movsbw; we don't care about the high 16 bits -// of the register here. This has a smaller encoding and avoids a -// partial-register update. Actual movsbw included for the disassembler. -def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), - "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), - "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; - -// FIXME: Use a pat pattern or define a syntax here. -let isCodeGenOnly=1 in { -def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), - "", [(set GR16:$dst, (sext GR8:$src))]>, TB; -def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), - "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB; -} -def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), +def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), + "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), + "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sext GR8:$src))]>, TB; def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), @@ -66,20 +55,10 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB; -// Use movzbl intead of movzbw; we don't care about the high 16 bits -// of the register here. This has a smaller encoding and avoids a -// partial-register update. Actual movzbw included for the disassembler. -def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), - "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), - "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -// FIXME: Use a pat pattern or define a syntax here. -let isCodeGenOnly=1 in { -def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), - "", [(set GR16:$dst, (zext GR8:$src))]>, TB; -def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), - "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB; -} +def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), + "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), + "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zext GR8:$src))]>, TB; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 03a0b0c3aed..3c37bc0a7d9 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1546,8 +1546,8 @@ def : InstAlias<"movq $src, $dst", def : InstAlias<"movsd", (MOVSD)>; // movsx aliases -def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src), 0>; -def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>; def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>; def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>; def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>; @@ -1555,8 +1555,8 @@ def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>; def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>; // movzx aliases -def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src), 0>; -def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src), 0>; +def : InstAlias<"movzx $src, $dst", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>; +def : InstAlias<"movzx $src, $dst", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>; def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>; def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>; def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index cbe6db26e5b..793156ffce8 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -355,10 +355,6 @@ ReSimplify: assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 && "LEA has segment specified!"); break; - case X86::MOVZX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break; - case X86::MOVZX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break; - case X86::MOVSX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVSX32rr8); break; - case X86::MOVSX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVSX32rm8); break; case X86::MOVZX64rr32: LowerSubReg32_Op0(OutMI, X86::MOV32rr); break; case X86::MOVZX64rm32: LowerSubReg32_Op0(OutMI, X86::MOV32rm); break; case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break; diff --git a/test/CodeGen/X86/div8.ll b/test/CodeGen/X86/div8.ll new file mode 100644 index 00000000000..0825f79e324 --- /dev/null +++ b/test/CodeGen/X86/div8.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s | FileCheck %s +; ModuleID = '8div.c' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-macosx10.6.6" + +define signext i8 @test_div(i8 %dividend, i8 %divisor) nounwind ssp { +entry: + %dividend.addr = alloca i8, align 2 + %divisor.addr = alloca i8, align 1 + %quotient = alloca i8, align 1 + store i8 %dividend, i8* %dividend.addr, align 2 + store i8 %divisor, i8* %divisor.addr, align 1 + %tmp = load i8* %dividend.addr, align 2 + %tmp1 = load i8* %divisor.addr, align 1 +; Insist on i8->i32 zero extension, even though divb demands only i16: +; CHECK: movzbl {{.*}}%eax +; CHECK: divb + %div = udiv i8 %tmp, %tmp1 + store i8 %div, i8* %quotient, align 1 + %tmp4 = load i8* %quotient, align 1 + ret i8 %tmp4 +} -- 2.34.1