Add the PPC64 popcntd instruction
authorHal Finkel <hfinkel@anl.gov>
Thu, 28 Mar 2013 13:29:47 +0000 (13:29 +0000)
committerHal Finkel <hfinkel@anl.gov>
Thu, 28 Mar 2013 13:29:47 +0000 (13:29 +0000)
PPC ISA 2.06 (P7, A2, etc.) has a popcntd instruction. Add this instruction and
tell TTI about it so that popcount-loop recognition will know about it.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178233 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/PowerPC/PPC.td
lib/Target/PowerPC/PPCISelLowering.cpp
lib/Target/PowerPC/PPCInstr64Bit.td
lib/Target/PowerPC/PPCSubtarget.cpp
lib/Target/PowerPC/PPCSubtarget.h
lib/Target/PowerPC/PPCTargetTransformInfo.cpp
test/CodeGen/PowerPC/popcnt.ll [new file with mode: 0644]

index 992913602a10d05bdb9f4202aa101301660a8d1d..dfab690a9d42522e24d845eb0df9a92a6afc648a 100644 (file)
@@ -61,6 +61,8 @@ def FeatureSTFIWX    : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
                                         "Enable the stfiwx instruction">;
 def FeatureISEL      : SubtargetFeature<"isel","HasISEL", "true",
                                         "Enable the isel instruction">;
+def FeaturePOPCNTD   : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
+                                        "Enable the popcnt[dw] instructions">;
 def FeatureBookE     : SubtargetFeature<"booke", "IsBookE", "true",
                                         "Enable Book E instructions">;
 def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
@@ -79,7 +81,6 @@ def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
 // LFIWAX       p6, p6x, p7        lfiwax
 // LFIWZX       p7                 lfiwzx
 // POPCNTB      p5 through p7      popcntb and related instructions
-// POPCNTD      p7                 popcntd and related instructions
 // RECIP_PREC   p6, p6x, p7        higher precision reciprocal estimates
 // VSX          p7                 vector-scalar instruction set
 
@@ -131,12 +132,13 @@ def : ProcessorModel<"e5500", PPCE5500Model,
 def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
                                          FeatureMFOCRF, FeatureFSqrt,
                                          FeatureSTFIWX, FeatureISEL,
-                                         Feature64Bit
+                                         FeaturePOPCNTD, Feature64Bit
                                      /*, Feature64BitRegs */]>;
 def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
                                           FeatureMFOCRF, FeatureFSqrt,
                                           FeatureSTFIWX, FeatureISEL,
-                                          Feature64Bit /*, Feature64BitRegs */,
+                                          FeaturePOPCNTD, Feature64Bit
+                                      /*, Feature64BitRegs */,
                                           FeatureQPX]>;
 def : Processor<"pwr3", G5Itineraries,
                   [DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
@@ -160,7 +162,8 @@ def : Processor<"pwr6x", G5Itineraries,
 def : Processor<"pwr7", G5Itineraries,
                   [DirectivePwr7, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
-                   FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>;
+                   FeatureISEL, FeaturePOPCNTD, Feature64Bit
+               /*, Feature64BitRegs */]>;
 def : Processor<"ppc", G3Itineraries, [Directive32]>;
 def : Processor<"ppc64", G5Itineraries,
                   [Directive64, FeatureAltivec,
index c0601ad4e7a27d88a212911c61e8e53fec9e366d..3275315a6a46dd3810ff9d531b0f3b20e7ac4344 100644 (file)
@@ -160,16 +160,22 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
 
   // PowerPC does not have BSWAP, CTPOP or CTTZ
   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
-  setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
-  setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
 
+  if (Subtarget->hasPOPCNTD()) {
+    setOperationAction(ISD::CTPOP, MVT::i32  , Promote);
+    setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
+  } else {
+    setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
+    setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
+  }
+
   // PowerPC does not have ROTR
   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
index 2a480d9a21274465a838e2b10265ba235c278842..0fbb11d1338a8d0b5228d10e2261c3c9070bd4b6 100644 (file)
@@ -468,6 +468,9 @@ def SRADI  : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
 def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
                       "cntlzd $rA, $rS", IntGeneral,
                       [(set i64:$rA, (ctlz i64:$rS))]>;
+def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS),
+                      "popcntd $rA, $rS", IntGeneral,
+                      [(set i64:$rA, (ctpop i64:$rS))]>;
 
 def DIVD  : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "divd $rT, $rA, $rB", IntDivD,
index 18e4c07942b966fef69ab938f06dfb749c819af1..11b7fc2b03b1bbc1804ac904336beef4bccad3f6 100644 (file)
@@ -40,6 +40,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
   , HasFSQRT(false)
   , HasSTFIWX(false)
   , HasISEL(false)
+  , HasPOPCNTD(false)
   , IsBookE(false)
   , HasLazyResolverStubs(false)
   , IsJITCodeModel(false)
index 15885bd2dfb270109e705b0d4b1dcdc5c106b031..070a9a9485f21b35f3df4233038626aa40ca72cb 100644 (file)
@@ -79,6 +79,7 @@ protected:
   bool HasFSQRT;
   bool HasSTFIWX;
   bool HasISEL;
+  bool HasPOPCNTD;
   bool IsBookE;
   bool HasLazyResolverStubs;
   bool IsJITCodeModel;
@@ -159,6 +160,7 @@ public:
   bool hasQPX() const { return HasQPX; }
   bool hasMFOCRF() const { return HasMFOCRF; }
   bool hasISEL() const { return HasISEL; }
+  bool hasPOPCNTD() const { return HasPOPCNTD; }
   bool isBookE() const { return IsBookE; }
 
   const Triple &getTargetTriple() const { return TargetTriple; }
index 5e9ad347d33235b72fdd3accb5aba7e0a52c6ad3..00037edafc922188b63f0113c6c49927d0a47c39 100644 (file)
@@ -122,9 +122,8 @@ llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
 
 PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
-  // FIXME: PPC currently does not have custom popcnt lowering even though
-  // there is hardware support. Once this is fixed, update this function
-  // to reflect the real capabilities of the hardware.
+  if (ST->hasPOPCNTD() && TyWidth <= 64)
+    return PSK_FastHardware;
   return PSK_Software;
 }
 
diff --git a/test/CodeGen/PowerPC/popcnt.ll b/test/CodeGen/PowerPC/popcnt.ll
new file mode 100644 (file)
index 0000000..363f705
--- /dev/null
@@ -0,0 +1,41 @@
+; RUN: llc -march=ppc64 -mattr=+popcntd < %s | FileCheck %s
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+  ret i8 %cnt
+; CHECK: @cnt8
+; CHECK: rldicl
+; CHECK: popcntd
+; CHECK: blr
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+  ret i16 %cnt
+; CHECK: @cnt16
+; CHECK: rldicl
+; CHECK: popcntd
+; CHECK: blr
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; CHECK: @cnt32
+; CHECK: rldicl
+; CHECK: popcntd
+; CHECK: blr
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; CHECK: @cnt64
+; CHECK: popcntd
+; CHECK: blr
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone