From: Kai Nacke Date: Thu, 20 Mar 2014 11:51:58 +0000 (+0000) Subject: [MIPS] Add cpu octeon and some instructions X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=ebf9f0c6cb5eb5a17bae7a24cfe380fe5801a60c;p=oota-llvm.git [MIPS] Add cpu octeon and some instructions The Octeon cpu from Cavium Networks is mips64r2 based and has an extended instruction set. In order to utilize this with LLVM, a new cpu feature "octeon" and a subtarget feature "cnmips" is added. A small set of new instructions (baddu, dmul, pop, dpop, seq, sne) is also added. LLVM generates dmul, pop and dpop instructions with option -mcpu=octeon or -mattr=+cnmips. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204337 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index a9e8dca80b4..325700a3eac 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -83,6 +83,10 @@ def FeatureMSA : SubtargetFeature<"msa", "HasMSA", "true", "Mips MSA ASE">; def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true", "microMips mode">; +def FeatureCnMips : SubtargetFeature<"cnmips", "HasCnMips", + "true", "Octeon cnMIPS Support", + [FeatureMips64r2]>; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// @@ -95,6 +99,7 @@ def : Proc<"mips32r2", [FeatureMips32r2, FeatureO32]>; def : Proc<"mips64", [FeatureMips64, FeatureN64]>; def : Proc<"mips64r2", [FeatureMips64r2, FeatureN64]>; def : Proc<"mips16", [FeatureMips16, FeatureO32]>; +def : Proc<"octeon", [FeatureMips64r2, FeatureN64, FeatureCnMips]>; def MipsAsmParser : AsmParser { let ShouldEmitMatchRegisterName = 0; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 54d8e35b724..06a6c1ca521 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -227,6 +227,44 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in { def SLL64_64 : FR<0x0, 0x00, (outs GPR64:$rd), (ins GPR64:$rt), "sll\t$rd, $rt, 0", [], II_SLL>; } + +// Cavium Octeon cmMIPS instructions +let Predicates = [HasCnMips] in { + +class Count1s: + InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), + [(set RO:$rd, (ctpop RO:$rs))], II_POP, FrmR, opstr>; + +class SetCC64_R : + InstSE<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, GPR64Opnd:$rt), + !strconcat(opstr, "\t$rd, $rs, $rt"), + [(set GPR64Opnd:$rd, (cond_op GPR64Opnd:$rs, GPR64Opnd:$rt))], + II_SEQ_SNE, FrmR, opstr>; + +// Unsigned Byte Add +def BADDu : InstSE<(outs GPR64Opnd:$rd), (ins GPR64Opnd:$rs, GPR64Opnd:$rt), + "baddu\t$rd, $rs, $rt", + [(set GPR64Opnd:$rd, (and (add GPR64Opnd:$rs, + GPR64Opnd:$rt), 255))], + II_BADDU, FrmR, "baddu">, ADD_FM<0x1c, 0x28> { + let isCommutable = 1; + let isReMaterializable = 1; +} + +// Multiply Doubleword to GPR +let Defs = [HI0, LO0, P0, P1, P2] in +def DMUL : ArithLogicR<"dmul", GPR64Opnd, 1, II_DMUL, mul>, + ADD_FM<0x1c, 0x03>; + +// Count Ones in a Word/Doubleword +def POP : Count1s<"pop", GPR32Opnd>, POP_FM<0x2c>; +def DPOP : Count1s<"dpop", GPR64Opnd>, POP_FM<0x2d>; + +// Set on equal/not equal +def SEQ : SetCC64_R<"seq", seteq>, SEQ_FM<0x2a>; +def SNE : SetCC64_R<"sne", setne>, SEQ_FM<0x2b>; +} + } //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 92250b82088..1e4cb4ac4fd 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -295,8 +295,13 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - setOperationAction(ISD::CTPOP, MVT::i32, Expand); - setOperationAction(ISD::CTPOP, MVT::i64, Expand); + if (Subtarget->hasCnMips()) { + setOperationAction(ISD::CTPOP, MVT::i32, Legal); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); + } else { + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + } setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i64, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 1ee4d1387ce..09fcd5ee8ff 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -489,6 +489,35 @@ class WAIT_FM : StdArch { let Inst{5-0} = 0x20; } +class POP_FM funct> : StdArch { + bits<5> rd; + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = 0x1c; + let Inst{25-21} = rs; + let Inst{20-16} = 0; + let Inst{15-11} = rd; + let Inst{10-6} = 0; + let Inst{5-0} = funct; +} + +class SEQ_FM funct> : StdArch { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = 0x1c; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = 0; + let Inst{5-0} = funct; +} + //===----------------------------------------------------------------------===// // System calls format //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 3cf59dfb282..ed0b69eb094 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -170,6 +170,8 @@ def IsN64 : Predicate<"Subtarget.isABI_N64()">, AssemblerPredicate<"FeatureN64">; def InMips16Mode : Predicate<"Subtarget.inMips16Mode()">, AssemblerPredicate<"FeatureMips16">; +def HasCnMips : Predicate<"Subtarget.hasCnMips()">, + AssemblerPredicate<"FeatureCnMips">; def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">, AssemblerPredicate<"FeatureMips32">; def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">, @@ -772,7 +774,6 @@ class CountLeading1: [(set RO:$rd, (ctlz (not RO:$rs)))], II_CLO, FrmR, opstr>, Requires<[HasBitCount, HasStdEnc]>; - // Sign Extend in Register. class SignExtInReg : diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 3173d0927af..f5e0bf5f00b 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -245,6 +245,15 @@ let Namespace = "Mips" in { def MSARequest : MipsReg<5, "5">; def MSAMap : MipsReg<6, "6">; def MSAUnmap : MipsReg<7, "7">; + + // Octeon multiplier and product registers + def MPL0 : MipsReg<0, "mpl0">; + def MPL1 : MipsReg<1, "mpl1">; + def MPL2 : MipsReg<2, "mpl2">; + def P0 : MipsReg<0, "p0">; + def P1 : MipsReg<1, "p1">; + def P2 : MipsReg<2, "p2">; + } //===----------------------------------------------------------------------===// @@ -376,6 +385,12 @@ def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>; def COP2 : RegisterClass<"Mips", [i32], 32, (sequence "COP2%u", 0, 31)>, Unallocatable; +// Octeon multiplier and product registers +def OCTEON_MPL : RegisterClass<"Mips", [i64], 64, (add MPL0, MPL1, MPL2)>, + Unallocatable; +def OCTEON_P : RegisterClass<"Mips", [i64], 64, (add P0, P1, P2)>, + Unallocatable; + // Register Operands. class MipsAsmRegOperand : AsmOperandClass { diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index cb6a5574f02..c40426d0e20 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -117,10 +117,14 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::MULHS, MVT::i32, Custom); setOperationAction(ISD::MULHU, MVT::i32, Custom); + if (Subtarget->hasCnMips()) + setOperationAction(ISD::MUL, MVT::i64, Legal); + else if (HasMips64) + setOperationAction(ISD::MUL, MVT::i64, Custom); + if (HasMips64) { setOperationAction(ISD::MULHS, MVT::i64, Custom); setOperationAction(ISD::MULHU, MVT::i64, Custom); - setOperationAction(ISD::MUL, MVT::i64, Custom); } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index b6a9652c045..e2fef8f94c6 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -28,6 +28,7 @@ def II_ADD_D : InstrItinClass; def II_ADD_S : InstrItinClass; def II_AND : InstrItinClass; def II_ANDI : InstrItinClass; +def II_BADDU : InstrItinClass; def II_CEIL : InstrItinClass; def II_CFC1 : InstrItinClass; def II_CLO : InstrItinClass; @@ -46,6 +47,7 @@ def II_DIV_D : InstrItinClass; def II_DIV_S : InstrItinClass; def II_DMFC1 : InstrItinClass; def II_DMTC1 : InstrItinClass; +def II_DMUL : InstrItinClass; def II_DMULT : InstrItinClass; def II_DMULTU : InstrItinClass; def II_DROTR : InstrItinClass; @@ -120,6 +122,7 @@ def II_NMSUB_S : InstrItinClass; def II_NOR : InstrItinClass; def II_OR : InstrItinClass; def II_ORI : InstrItinClass; +def II_POP : InstrItinClass; def II_RDHWR : InstrItinClass; def II_RESTORE : InstrItinClass; def II_ROTR : InstrItinClass; @@ -134,6 +137,8 @@ def II_SDR : InstrItinClass; def II_SDXC1 : InstrItinClass; def II_SEB : InstrItinClass; def II_SEH : InstrItinClass; +def II_SEQ_SNE : InstrItinClass; // seq and sne +def II_SEQI_SNEI : InstrItinClass; // seqi and snei def II_SH : InstrItinClass; def II_SLL : InstrItinClass; def II_SLLV : InstrItinClass; @@ -167,6 +172,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, @@ -197,6 +203,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, @@ -223,7 +230,10 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, + InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index af83d61f091..b0565f80026 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -81,8 +81,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), - IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false), - HasBitCount(false), HasFPIdx(false), + HasCnMips(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), + HasSwap(false), HasBitCount(false), HasFPIdx(false), InMips16Mode(false), InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index ba1e1452d24..2904891e789 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -65,6 +65,9 @@ protected: // HasVFPU - Processor has a vector floating point unit. bool HasVFPU; + // CPU supports cnMIPS (Cavium Networks Octeon CPU). + bool HasCnMips; + // isLinux - Target system is Linux. Is false we consider ELFOS for now. bool IsLinux; @@ -154,6 +157,8 @@ public: bool hasMips64() const { return MipsArchVersion >= Mips64; } bool hasMips64r2() const { return MipsArchVersion == Mips64r2; } + bool hasCnMips() const { return HasCnMips; } + bool isLittle() const { return IsLittle; } bool isFP64bit() const { return IsFP64bit; } bool isNotFP64bit() const { return !IsFP64bit; } diff --git a/test/CodeGen/Mips/octeon.ll b/test/CodeGen/Mips/octeon.ll new file mode 100644 index 00000000000..092938a6b5c --- /dev/null +++ b/test/CodeGen/Mips/octeon.ll @@ -0,0 +1,15 @@ +; RUN: llc -O1 < %s -march=mips64 -mcpu=octeon | FileCheck %s -check-prefix=OCTEON +; RUN: llc -O1 < %s -march=mips64 -mcpu=mips64 | FileCheck %s -check-prefix=MIPS64 + +define i64 @mul(i64 %a, i64 %b) nounwind { +entry: +; OCTEON-LABEL: mul: +; OCTEON: jr $ra +; OCTEON: dmul $2, $4, $5 +; MIPS64-LABEL: mul: +; MIPS64: dmult +; MIPS64: jr +; MIPS64: mflo + %res = mul i64 %a, %b + ret i64 %res +} diff --git a/test/CodeGen/Mips/octeon_popcnt.ll b/test/CodeGen/Mips/octeon_popcnt.ll new file mode 100644 index 00000000000..52c37f69d02 --- /dev/null +++ b/test/CodeGen/Mips/octeon_popcnt.ll @@ -0,0 +1,47 @@ +; RUN: llc -O1 -march=mips64 -mcpu=octeon < %s | FileCheck %s -check-prefix=OCTEON +; RUN: llc -O1 -march=mips64 -mcpu=mips64 < %s | FileCheck %s -check-prefix=MIPS64 + +define i8 @cnt8(i8 %x) nounwind readnone { + %cnt = tail call i8 @llvm.ctpop.i8(i8 %x) + ret i8 %cnt +; OCTEON-LABEL: cnt8: +; OCTEON: jr $ra +; OCTEON: pop $2, $1 +; MIPS64-LABEL: cnt8: +; MIPS64-NOT: pop +} + +define i16 @cnt16(i16 %x) nounwind readnone { + %cnt = tail call i16 @llvm.ctpop.i16(i16 %x) + ret i16 %cnt +; OCTEON-LABEL: cnt16: +; OCTEON: jr $ra +; OCTEON: pop $2, $1 +; MIPS64-LABEL: cnt16: +; MIPS64-NOT: pop +} + +define i32 @cnt32(i32 %x) nounwind readnone { + %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) + ret i32 %cnt +; OCTEON-LABEL: cnt32: +; OCTEON: jr $ra +; OCTEON: pop $2, $4 +; MIPS64-LABEL: cnt32: +; MIPS64-NOT: pop +} + +define i64 @cnt64(i64 %x) nounwind readnone { + %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) + ret i64 %cnt +; OCTEON-LABEL: cnt64: +; OCTEON: jr $ra +; OCTEON: dpop $2, $4 +; MIPS64-LABEL: cnt64: +; MIPS64-NOT: dpop +} + +declare i8 @llvm.ctpop.i8(i8) nounwind readnone +declare i16 @llvm.ctpop.i16(i16) nounwind readnone +declare i32 @llvm.ctpop.i32(i32) nounwind readnone +declare i64 @llvm.ctpop.i64(i64) nounwind readnone diff --git a/test/MC/Mips/octeon-instructions.s b/test/MC/Mips/octeon-instructions.s new file mode 100644 index 00000000000..7da582916fc --- /dev/null +++ b/test/MC/Mips/octeon-instructions.s @@ -0,0 +1,23 @@ +# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=octeon | FileCheck %s + +# CHECK: baddu $9, $6, $7 # encoding: [0x70,0xc7,0x48,0x28] +# CHECK: baddu $17, $18, $19 # encoding: [0x72,0x53,0x88,0x28] +# CHECK: dmul $9, $6, $7 # encoding: [0x70,0xc7,0x48,0x03] +# CHECK: dmul $19, $24, $25 # encoding: [0x73,0x19,0x98,0x03] +# CHECK: dpop $9, $6 # encoding: [0x70,0xc0,0x48,0x2d] +# CHECK: dpop $15, $22 # encoding: [0x72,0xc0,0x78,0x2d] +# CHECK: pop $9, $6 # encoding: [0x70,0xc0,0x48,0x2c] +# CHECK: pop $8, $19 # encoding: [0x72,0x60,0x40,0x2c] +# CHECK: seq $25, $23, $24 # encoding: [0x72,0xf8,0xc8,0x2a] +# CHECK: sne $25, $23, $24 # encoding: [0x72,0xf8,0xc8,0x2b] + + baddu $9, $6, $7 + baddu $17, $18, $19 + dmul $9, $6, $7 + dmul $19, $24, $25 + dpop $9, $6 + dpop $15, $22 + pop $9, $6 + pop $8, $19 + seq $25, $23, $24 + sne $25, $23, $24