Add support for the rep movs[bwd] instructions, and emit them when code

author Chris Lattner <sabre@nondot.org>

Thu, 12 Feb 2004 17:53:22 +0000 (17:53 +0000)

committer Chris Lattner <sabre@nondot.org>

Thu, 12 Feb 2004 17:53:22 +0000 (17:53 +0000)
author Chris Lattner <sabre@nondot.org>
Thu, 12 Feb 2004 17:53:22 +0000 (17:53 +0000)
committer Chris Lattner <sabre@nondot.org>
Thu, 12 Feb 2004 17:53:22 +0000 (17:53 +0000)
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp

index 9df7697344d64c5e7bfac99ac3376d6aff2746a4..67849e2edbb2940e9902c39ad921837e7f8c44c0 100644 (file)
--- a/lib/Target/X86/InstSelectSimple.cpp
+++ b/lib/Target/X86/InstSelectSimple.cpp
@@ -1157,6 +1157,7 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
            case Intrinsic::va_start:
            case Intrinsic::va_copy:
            case Intrinsic::va_end:
+          case Intrinsic::memcpy:
              // We directly implement these intrinsics
              break;
            default:
@@ -1188,6 +1189,58 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
      return;
    case Intrinsic::va_end: return;   // Noop on X86
  
+  case Intrinsic::memcpy: {
+    assert(CI.getNumOperands() == 5 && "Illegal llvm.memcpy call!");
+    unsigned Align = 1;
+    if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
+      Align = AlignC->getRawValue();
+      if (Align == 0) Align = 1;
+    }
+
+    // Turn the byte code into # iterations
+    unsigned ByteReg = getReg(CI.getOperand(3));
+    unsigned CountReg;
+    
+    switch (Align & 3) {
+    case 2:   // WORD aligned
+      CountReg = makeAnotherReg(Type::IntTy);
+      BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(1);
+      break;
+    case 0:   // DWORD aligned
+      CountReg = makeAnotherReg(Type::IntTy);
+      BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(2);
+      break;
+    case 1:   // BYTE aligned
+    case 3:   // BYTE aligned
+      CountReg = ByteReg;
+      break;
+    }
+
+    // No matter what the alignment is, we put the source in ESI, the
+    // destination in EDI, and the count in ECX.
+    TmpReg1 = getReg(CI.getOperand(1));
+    TmpReg2 = getReg(CI.getOperand(2));
+    BuildMI(BB, X86::MOVrr32, 1, X86::ECX).addReg(CountReg);
+    BuildMI(BB, X86::MOVrr32, 1, X86::EDI).addReg(TmpReg1);
+    BuildMI(BB, X86::MOVrr32, 1, X86::ESI).addReg(TmpReg2);
+
+    unsigned Bytes = getReg(CI.getOperand(3));
+    switch (Align & 3) {
+    case 1:   // BYTE aligned
+    case 3:   // BYTE aligned
+      BuildMI(BB, X86::REP_MOVSB, 0);
+      break;
+    case 2:   // WORD aligned
+      BuildMI(BB, X86::REP_MOVSW, 0);
+      break;
+    case 0:   // DWORD aligned
+      BuildMI(BB, X86::REP_MOVSD, 0);
+      break;
+    }
+
+    return;
+  }
+
    default: assert(0 && "Error: unknown intrinsics should have been lowered!");
    }
  }
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp

index be6319cd0c5f4081c44faec0d77e712e10a46de3..83e5e102d59751a443f96ea24379866a4ed9e0b9 100644 (file)
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -470,6 +470,9 @@ void Emitter::emitInstruction(MachineInstr &MI) {
    unsigned Opcode = MI.getOpcode();
    const TargetInstrDescriptor &Desc = II->get(Opcode);
  
+  // Emit the repeat opcode prefix as needed.
+  if ((Desc.TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3);
+
    // Emit instruction prefixes if necessary
    if (Desc.TSFlags & X86II::OpSize) MCE.emitByte(0x66);// Operand size...
  
@@ -477,6 +480,7 @@ void Emitter::emitInstruction(MachineInstr &MI) {
    case X86II::TB:
      MCE.emitByte(0x0F);   // Two-byte opcode prefix
      break;
+  case X86II::REP: break; // already handled.
    case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
    case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
      MCE.emitByte(0xD8+
diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp

index 9df7697344d64c5e7bfac99ac3376d6aff2746a4..67849e2edbb2940e9902c39ad921837e7f8c44c0 100644 (file)
--- a/lib/Target/X86/X86ISelSimple.cpp
+++ b/lib/Target/X86/X86ISelSimple.cpp
@@ -1157,6 +1157,7 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
            case Intrinsic::va_start:
            case Intrinsic::va_copy:
            case Intrinsic::va_end:
+          case Intrinsic::memcpy:
              // We directly implement these intrinsics
              break;
            default:
@@ -1188,6 +1189,58 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
      return;
    case Intrinsic::va_end: return;   // Noop on X86
  
+  case Intrinsic::memcpy: {
+    assert(CI.getNumOperands() == 5 && "Illegal llvm.memcpy call!");
+    unsigned Align = 1;
+    if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
+      Align = AlignC->getRawValue();
+      if (Align == 0) Align = 1;
+    }
+
+    // Turn the byte code into # iterations
+    unsigned ByteReg = getReg(CI.getOperand(3));
+    unsigned CountReg;
+    
+    switch (Align & 3) {
+    case 2:   // WORD aligned
+      CountReg = makeAnotherReg(Type::IntTy);
+      BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(1);
+      break;
+    case 0:   // DWORD aligned
+      CountReg = makeAnotherReg(Type::IntTy);
+      BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(2);
+      break;
+    case 1:   // BYTE aligned
+    case 3:   // BYTE aligned
+      CountReg = ByteReg;
+      break;
+    }
+
+    // No matter what the alignment is, we put the source in ESI, the
+    // destination in EDI, and the count in ECX.
+    TmpReg1 = getReg(CI.getOperand(1));
+    TmpReg2 = getReg(CI.getOperand(2));
+    BuildMI(BB, X86::MOVrr32, 1, X86::ECX).addReg(CountReg);
+    BuildMI(BB, X86::MOVrr32, 1, X86::EDI).addReg(TmpReg1);
+    BuildMI(BB, X86::MOVrr32, 1, X86::ESI).addReg(TmpReg2);
+
+    unsigned Bytes = getReg(CI.getOperand(3));
+    switch (Align & 3) {
+    case 1:   // BYTE aligned
+    case 3:   // BYTE aligned
+      BuildMI(BB, X86::REP_MOVSB, 0);
+      break;
+    case 2:   // WORD aligned
+      BuildMI(BB, X86::REP_MOVSW, 0);
+      break;
+    case 0:   // DWORD aligned
+      BuildMI(BB, X86::REP_MOVSD, 0);
+      break;
+    }
+
+    return;
+  }
+
    default: assert(0 && "Error: unknown intrinsics should have been lowered!");
    }
  }
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h

index 98f9fe68d72404e39ca59f3e6f9686447390d148..c6e3b761766a905b189b739d821de17558918aba 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -86,9 +86,9 @@ namespace X86II {
      OpSize      = 1 << 5,
  
      // Op0Mask - There are several prefix bytes that are used to form two byte
-    // opcodes.  These are currently 0x0F, and 0xD8-0xDF.  This mask is used to
-    // obtain the setting of this field.  If no bits in this field is set, there
-    // is no prefix byte for obtaining a multibyte opcode.
+    // opcodes.  These are currently 0x0F, 0xF3, and 0xD8-0xDF.  This mask is
+    // used to obtain the setting of this field.  If no bits in this field is
+    // set, there is no prefix byte for obtaining a multibyte opcode.
      //
      Op0Shift    = 6,
      Op0Mask     = 0xF << Op0Shift,
@@ -97,12 +97,16 @@ namespace X86II {
      // starts with a 0x0F byte before the real opcode.
      TB          = 1 << Op0Shift,
  
+    // REP - The 0xF3 prefix byte indicating repetition of the following
+    // instruction.
+    REP         = 2 << Op0Shift,
+
      // D8-DF - These escape opcodes are used by the floating point unit.  These
      // values must remain sequential.
-    D8 = 2 << Op0Shift,   D9 = 3 << Op0Shift,
-    DA = 4 << Op0Shift,   DB = 5 << Op0Shift,
-    DC = 6 << Op0Shift,   DD = 7 << Op0Shift,
-    DE = 8 << Op0Shift,   DF = 9 << Op0Shift,
+    D8 = 3 << Op0Shift,   D9 = 4 << Op0Shift,
+    DA = 5 << Op0Shift,   DB = 6 << Op0Shift,
+    DC = 7 << Op0Shift,   DD = 8 << Op0Shift,
+    DE = 9 << Op0Shift,   DF = 10 << Op0Shift,
  
      //===------------------------------------------------------------------===//
      // This three-bit field describes the size of a memory operand.  Zero is
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td

index 92f193f985049dbe25b1c89433166120d3a3d105..4bb1a9550a945021987b50e5d4e86670186075bf 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -92,14 +92,15 @@ class Pattern<dag P> {
  // emitter that various prefix bytes are required.
  class OpSize { bit hasOpSizePrefix = 1; }
  class TB     { bits<4> Prefix = 1; }
-class D8     { bits<4> Prefix = 2; }
-class D9     { bits<4> Prefix = 3; }
-class DA     { bits<4> Prefix = 4; }
-class DB     { bits<4> Prefix = 5; }
-class DC     { bits<4> Prefix = 6; }
-class DD     { bits<4> Prefix = 7; }
-class DE     { bits<4> Prefix = 8; }
-class DF     { bits<4> Prefix = 9; }
+class REP    { bits<4> Prefix = 2; }
+class D8     { bits<4> Prefix = 3; }
+class D9     { bits<4> Prefix = 4; }
+class DA     { bits<4> Prefix = 5; }
+class DB     { bits<4> Prefix = 6; }
+class DC     { bits<4> Prefix = 7; }
+class DD     { bits<4> Prefix = 8; }
+class DE     { bits<4> Prefix = 9; }
+class DF     { bits<4> Prefix = 10; }
  
  
  
@@ -172,6 +173,14 @@ def XCHGrr32 : X86Inst<"xchg", 0x87, MRMDestReg, Arg32>;        // xchg R32, R32
  def LEAr16 : X86Inst<"lea", 0x8D, MRMSrcMem, Arg16>, OpSize; // R16 = lea [mem]
  def LEAr32 : X86Inst<"lea", 0x8D, MRMSrcMem, Arg32>;         // R32 = lea [mem]
  
+
+def REP_MOVSB : X86Inst<"rep movsb", 0xA4, RawFrm, NoArg>, REP,
+                Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>;
+def REP_MOVSW : X86Inst<"rep movsw", 0xA5, RawFrm, NoArg>, REP, OpSize,
+                Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>;
+def REP_MOVSD : X86Inst<"rep movsd", 0xA5, RawFrm, NoArg>, REP,
+                Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>;
+
  //===----------------------------------------------------------------------===//
  //  Move Instructions...
  //
author	Chris Lattner <sabre@nondot.org>
	Thu, 12 Feb 2004 17:53:22 +0000 (17:53 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Thu, 12 Feb 2004 17:53:22 +0000 (17:53 +0000)
lib/Target/X86/InstSelectSimple.cpp		patch \| blob \| history
lib/Target/X86/X86CodeEmitter.cpp		patch \| blob \| history
lib/Target/X86/X86ISelSimple.cpp		patch \| blob \| history
lib/Target/X86/X86InstrInfo.h		patch \| blob \| history
lib/Target/X86/X86InstrInfo.td		patch \| blob \| history