From: Kai Nacke <kai.nacke@redstar.de>
Date: Thu, 20 Mar 2014 11:51:58 +0000 (+0000)
Subject: [MIPS] Add cpu octeon and some instructions
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=ebf9f0c6cb5eb5a17bae7a24cfe380fe5801a60c;p=oota-llvm.git

[MIPS] Add cpu octeon and some instructions

The Octeon cpu from Cavium Networks is mips64r2 based and has an extended
instruction set. In order to utilize this with LLVM, a new cpu feature "octeon"
and a subtarget feature "cnmips" is added. A small set of new instructions
(baddu, dmul, pop, dpop, seq, sne) is also added. LLVM generates dmul, pop and
dpop instructions with option -mcpu=octeon or -mattr=+cnmips.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204337 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index a9e8dca80b4..325700a3eac 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -83,6 +83,10 @@ def FeatureMSA : SubtargetFeature<"msa", "HasMSA", "true", "Mips MSA ASE">;
 def FeatureMicroMips  : SubtargetFeature<"micromips", "InMicroMipsMode", "true",
                                          "microMips mode">;
 
+def FeatureCnMips     : SubtargetFeature<"cnmips", "HasCnMips",
+                                "true", "Octeon cnMIPS Support",
+                                [FeatureMips64r2]>;
+
 //===----------------------------------------------------------------------===//
 // Mips processors supported.
 //===----------------------------------------------------------------------===//
@@ -95,6 +99,7 @@ def : Proc<"mips32r2", [FeatureMips32r2, FeatureO32]>;
 def : Proc<"mips64", [FeatureMips64, FeatureN64]>;
 def : Proc<"mips64r2", [FeatureMips64r2, FeatureN64]>;
 def : Proc<"mips16", [FeatureMips16, FeatureO32]>;
+def : Proc<"octeon", [FeatureMips64r2, FeatureN64, FeatureCnMips]>;
 
 def MipsAsmParser : AsmParser {
   let ShouldEmitMatchRegisterName = 0;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 54d8e35b724..06a6c1ca521 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -227,6 +227,44 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
   def SLL64_64 : FR<0x0, 0x00, (outs GPR64:$rd), (ins GPR64:$rt),
                     "sll\t$rd, $rt, 0", [], II_SLL>;
 }
+
+// Cavium Octeon cmMIPS instructions
+let Predicates = [HasCnMips] in {
+
+class Count1s<string opstr, RegisterOperand RO>:
+  InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+         [(set RO:$rd, (ctpop RO:$rs))], II_POP, FrmR, opstr>;
+
+class SetCC64_R<string opstr, PatFrag cond_op> :
+  InstSE<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, GPR64Opnd:$rt),
+         !strconcat(opstr, "\t$rd, $rs, $rt"),
+         [(set GPR64Opnd:$rd, (cond_op GPR64Opnd:$rs, GPR64Opnd:$rt))],
+         II_SEQ_SNE, FrmR, opstr>;
+
+// Unsigned Byte Add
+def BADDu  : InstSE<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, GPR64Opnd:$rt),
+                    "baddu\t$rd, $rs, $rt",
+                    [(set GPR64Opnd:$rd, (and (add GPR64Opnd:$rs,
+                                                   GPR64Opnd:$rt), 255))],
+                    II_BADDU, FrmR, "baddu">, ADD_FM<0x1c, 0x28> {
+  let isCommutable = 1;
+  let isReMaterializable = 1;
+}
+
+// Multiply Doubleword to GPR
+let Defs = [HI0, LO0, P0, P1, P2] in
+def DMUL  : ArithLogicR<"dmul", GPR64Opnd, 1, II_DMUL, mul>,
+                              ADD_FM<0x1c, 0x03>;
+
+// Count Ones in a Word/Doubleword
+def POP   : Count1s<"pop", GPR32Opnd>, POP_FM<0x2c>;
+def DPOP  : Count1s<"dpop", GPR64Opnd>, POP_FM<0x2d>;
+
+// Set on equal/not equal
+def SEQ   : SetCC64_R<"seq", seteq>, SEQ_FM<0x2a>;
+def SNE   : SetCC64_R<"sne", setne>, SEQ_FM<0x2b>;
+}
+
 }
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 92250b82088..1e4cb4ac4fd 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -295,8 +295,13 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::FP_TO_UINT,        MVT::i32,   Expand);
   setOperationAction(ISD::FP_TO_UINT,        MVT::i64,   Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,    Expand);
-  setOperationAction(ISD::CTPOP,             MVT::i32,   Expand);
-  setOperationAction(ISD::CTPOP,             MVT::i64,   Expand);
+  if (Subtarget->hasCnMips()) {
+    setOperationAction(ISD::CTPOP,           MVT::i32,   Legal);
+    setOperationAction(ISD::CTPOP,           MVT::i64,   Legal);
+  } else {
+    setOperationAction(ISD::CTPOP,           MVT::i32,   Expand);
+    setOperationAction(ISD::CTPOP,           MVT::i64,   Expand);
+  }
   setOperationAction(ISD::CTTZ,              MVT::i32,   Expand);
   setOperationAction(ISD::CTTZ,              MVT::i64,   Expand);
   setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::i32,   Expand);
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 1ee4d1387ce..09fcd5ee8ff 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -489,6 +489,35 @@ class WAIT_FM : StdArch {
   let Inst{5-0}   = 0x20;
 }
 
+class POP_FM<bits<6> funct> : StdArch {
+  bits<5> rd;
+  bits<5> rs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x1c;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = 0;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = 0;
+  let Inst{5-0}   = funct;
+}
+
+class SEQ_FM<bits<6> funct> : StdArch {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x1c;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = 0;
+  let Inst{5-0}   = funct;
+}
+
 //===----------------------------------------------------------------------===//
 //  System calls format <op|code_|funct>
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 3cf59dfb282..ed0b69eb094 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -170,6 +170,8 @@ def IsN64       :     Predicate<"Subtarget.isABI_N64()">,
                       AssemblerPredicate<"FeatureN64">;
 def InMips16Mode :    Predicate<"Subtarget.inMips16Mode()">,
                       AssemblerPredicate<"FeatureMips16">;
+def HasCnMips    :    Predicate<"Subtarget.hasCnMips()">,
+                      AssemblerPredicate<"FeatureCnMips">;
 def RelocStatic :     Predicate<"TM.getRelocationModel() == Reloc::Static">,
                       AssemblerPredicate<"FeatureMips32">;
 def RelocPIC    :     Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
@@ -772,7 +774,6 @@ class CountLeading1<string opstr, RegisterOperand RO>:
          [(set RO:$rd, (ctlz (not RO:$rs)))], II_CLO, FrmR, opstr>,
   Requires<[HasBitCount, HasStdEnc]>;
 
-
 // Sign Extend in Register.
 class SignExtInReg<string opstr, ValueType vt, RegisterOperand RO,
                    InstrItinClass itin> :
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 3173d0927af..f5e0bf5f00b 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -245,6 +245,15 @@ let Namespace = "Mips" in {
   def MSARequest : MipsReg<5, "5">;
   def MSAMap     : MipsReg<6, "6">;
   def MSAUnmap   : MipsReg<7, "7">;
+
+  // Octeon multiplier and product registers
+  def MPL0 : MipsReg<0, "mpl0">;
+  def MPL1 : MipsReg<1, "mpl1">;
+  def MPL2 : MipsReg<2, "mpl2">;
+  def P0 : MipsReg<0, "p0">;
+  def P1 : MipsReg<1, "p1">;
+  def P2 : MipsReg<2, "p2">;
+
 }
 
 //===----------------------------------------------------------------------===//
@@ -376,6 +385,12 @@ def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>;
 def COP2 : RegisterClass<"Mips", [i32], 32, (sequence "COP2%u", 0, 31)>,
            Unallocatable;
 
+// Octeon multiplier and product registers
+def OCTEON_MPL : RegisterClass<"Mips", [i64], 64, (add MPL0, MPL1, MPL2)>,
+                 Unallocatable;
+def OCTEON_P : RegisterClass<"Mips", [i64], 64, (add P0, P1, P2)>,
+               Unallocatable;
+
 // Register Operands.
 
 class MipsAsmRegOperand : AsmOperandClass {
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index cb6a5574f02..c40426d0e20 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -117,10 +117,14 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::MULHS,              MVT::i32, Custom);
   setOperationAction(ISD::MULHU,              MVT::i32, Custom);
 
+  if (Subtarget->hasCnMips())
+    setOperationAction(ISD::MUL,              MVT::i64, Legal);
+  else if (HasMips64)
+    setOperationAction(ISD::MUL,              MVT::i64, Custom);
+
   if (HasMips64) {
     setOperationAction(ISD::MULHS,            MVT::i64, Custom);
     setOperationAction(ISD::MULHU,            MVT::i64, Custom);
-    setOperationAction(ISD::MUL,              MVT::i64, Custom);
   }
 
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index b6a9652c045..e2fef8f94c6 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -28,6 +28,7 @@ def II_ADD_D            : InstrItinClass;
 def II_ADD_S            : InstrItinClass;
 def II_AND              : InstrItinClass;
 def II_ANDI             : InstrItinClass;
+def II_BADDU            : InstrItinClass;
 def II_CEIL             : InstrItinClass;
 def II_CFC1             : InstrItinClass;
 def II_CLO              : InstrItinClass;
@@ -46,6 +47,7 @@ def II_DIV_D            : InstrItinClass;
 def II_DIV_S            : InstrItinClass;
 def II_DMFC1            : InstrItinClass;
 def II_DMTC1            : InstrItinClass;
+def II_DMUL             : InstrItinClass;
 def II_DMULT            : InstrItinClass;
 def II_DMULTU           : InstrItinClass;
 def II_DROTR            : InstrItinClass;
@@ -120,6 +122,7 @@ def II_NMSUB_S          : InstrItinClass;
 def II_NOR              : InstrItinClass;
 def II_OR               : InstrItinClass;
 def II_ORI              : InstrItinClass;
+def II_POP              : InstrItinClass;
 def II_RDHWR            : InstrItinClass;
 def II_RESTORE          : InstrItinClass;
 def II_ROTR             : InstrItinClass;
@@ -134,6 +137,8 @@ def II_SDR              : InstrItinClass;
 def II_SDXC1            : InstrItinClass;
 def II_SEB              : InstrItinClass;
 def II_SEH              : InstrItinClass;
+def II_SEQ_SNE          : InstrItinClass; // seq and sne
+def II_SEQI_SNEI        : InstrItinClass; // seqi and snei
 def II_SH               : InstrItinClass;
 def II_SLL              : InstrItinClass;
 def II_SLLV             : InstrItinClass;
@@ -167,6 +172,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_ADDIU           , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_ADDU            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_AND             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BADDU           , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SLL             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SRA             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SRL             , [InstrStage<1,  [ALU]>]>,
@@ -197,6 +203,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_MOVZ            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_NOR             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_OR              , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_POP             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_RDHWR           , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SUBU            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_XOR             , [InstrStage<1,  [ALU]>]>,
@@ -223,7 +230,10 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_SDR             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SD              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SAVE            , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_SEQ_SNE         , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_SEQI_SNEI       , [InstrStage<1,  [ALU]>]>,
   InstrItinData<IIBranch           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_DMUL            , [InstrStage<17, [IMULDIV]>]>,
   InstrItinData<II_DMULT           , [InstrStage<17, [IMULDIV]>]>,
   InstrItinData<II_DMULTU          , [InstrStage<17, [IMULDIV]>]>,
   InstrItinData<II_MADD            , [InstrStage<17, [IMULDIV]>]>,
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index af83d61f091..b0565f80026 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -81,8 +81,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
   MipsGenSubtargetInfo(TT, CPU, FS),
   MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
   IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
-  IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false),
-  HasBitCount(false), HasFPIdx(false),
+  HasCnMips(false), IsLinux(true), HasSEInReg(false), HasCondMov(false),
+  HasSwap(false), HasBitCount(false), HasFPIdx(false),
   InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
   InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
   AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false),
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index ba1e1452d24..2904891e789 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -65,6 +65,9 @@ protected:
   // HasVFPU - Processor has a vector floating point unit.
   bool HasVFPU;
 
+  // CPU supports cnMIPS (Cavium Networks Octeon CPU).
+  bool HasCnMips;
+
   // isLinux - Target system is Linux. Is false we consider ELFOS for now.
   bool IsLinux;
 
@@ -154,6 +157,8 @@ public:
   bool hasMips64() const { return MipsArchVersion >= Mips64; }
   bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
 
+  bool hasCnMips() const { return HasCnMips; }
+
   bool isLittle() const { return IsLittle; }
   bool isFP64bit() const { return IsFP64bit; }
   bool isNotFP64bit() const { return !IsFP64bit; }
diff --git a/test/CodeGen/Mips/octeon.ll b/test/CodeGen/Mips/octeon.ll
new file mode 100644
index 00000000000..092938a6b5c
--- /dev/null
+++ b/test/CodeGen/Mips/octeon.ll
@@ -0,0 +1,15 @@
+; RUN: llc -O1 < %s -march=mips64 -mcpu=octeon | FileCheck %s -check-prefix=OCTEON
+; RUN: llc -O1 < %s -march=mips64 -mcpu=mips64 | FileCheck %s -check-prefix=MIPS64
+
+define i64 @mul(i64 %a, i64 %b) nounwind {
+entry:
+; OCTEON-LABEL: mul:
+; OCTEON: jr    $ra
+; OCTEON: dmul  $2, $4, $5
+; MIPS64-LABEL: mul:
+; MIPS64: dmult
+; MIPS64: jr
+; MIPS64: mflo
+  %res = mul i64 %a, %b
+  ret i64 %res
+}
diff --git a/test/CodeGen/Mips/octeon_popcnt.ll b/test/CodeGen/Mips/octeon_popcnt.ll
new file mode 100644
index 00000000000..52c37f69d02
--- /dev/null
+++ b/test/CodeGen/Mips/octeon_popcnt.ll
@@ -0,0 +1,47 @@
+; RUN: llc -O1 -march=mips64 -mcpu=octeon < %s | FileCheck %s -check-prefix=OCTEON
+; RUN: llc -O1 -march=mips64 -mcpu=mips64 < %s | FileCheck %s -check-prefix=MIPS64
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+  ret i8 %cnt
+; OCTEON-LABEL: cnt8:
+; OCTEON: jr   $ra
+; OCTEON: pop  $2, $1
+; MIPS64-LABEL: cnt8:
+; MIPS64-NOT: pop
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+  ret i16 %cnt
+; OCTEON-LABEL: cnt16:
+; OCTEON: jr   $ra
+; OCTEON: pop  $2, $1
+; MIPS64-LABEL: cnt16:
+; MIPS64-NOT: pop
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; OCTEON-LABEL: cnt32:
+; OCTEON: jr   $ra
+; OCTEON: pop  $2, $4
+; MIPS64-LABEL: cnt32:
+; MIPS64-NOT: pop
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; OCTEON-LABEL: cnt64:
+; OCTEON: jr   $ra
+; OCTEON: dpop $2, $4
+; MIPS64-LABEL: cnt64:
+; MIPS64-NOT: dpop
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/MC/Mips/octeon-instructions.s b/test/MC/Mips/octeon-instructions.s
new file mode 100644
index 00000000000..7da582916fc
--- /dev/null
+++ b/test/MC/Mips/octeon-instructions.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc  %s -triple=mips64-unknown-linux -show-encoding -mcpu=octeon | FileCheck %s
+
+# CHECK: baddu $9, $6, $7             # encoding: [0x70,0xc7,0x48,0x28]
+# CHECK: baddu $17, $18, $19          # encoding: [0x72,0x53,0x88,0x28]
+# CHECK: dmul  $9, $6, $7             # encoding: [0x70,0xc7,0x48,0x03]
+# CHECK: dmul  $19, $24, $25          # encoding: [0x73,0x19,0x98,0x03]
+# CHECK: dpop  $9, $6                 # encoding: [0x70,0xc0,0x48,0x2d]
+# CHECK: dpop  $15, $22               # encoding: [0x72,0xc0,0x78,0x2d]
+# CHECK: pop   $9, $6                 # encoding: [0x70,0xc0,0x48,0x2c]
+# CHECK: pop   $8, $19                # encoding: [0x72,0x60,0x40,0x2c]
+# CHECK: seq   $25, $23, $24          # encoding: [0x72,0xf8,0xc8,0x2a]
+# CHECK: sne   $25, $23, $24          # encoding: [0x72,0xf8,0xc8,0x2b]
+
+  baddu $9, $6, $7
+  baddu $17, $18, $19
+  dmul  $9, $6, $7
+  dmul  $19, $24, $25
+  dpop  $9, $6
+  dpop  $15, $22
+  pop   $9, $6
+  pop   $8, $19
+  seq   $25, $23, $24
+  sne   $25, $23, $24