fix expansion of ct[lt]z nodes

[oota-llvm.git] / lib / CodeGen / SelectionDAG / LegalizeDAG.cpp
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

index 282dfd11ca9ce6166d4ee13136977cd97f614555..135045582db63b9ec48516c6bfe786acb6ed5fa3 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -130,6 +130,8 @@ private:
    void ExpandByParts(unsigned NodeOp, SDOperand LHS, SDOperand RHS,
                       SDOperand &Lo, SDOperand &Hi);
  
+  void SpliceCallInto(const SDOperand &CallResult, SDNode *OutChain);
+
    SDOperand getIntPtrConstant(uint64_t Val) {
      return DAG.getConstant(Val, TLI.getPointerTy());
    }
@@ -160,12 +162,13 @@ void SelectionDAGLegalize::LegalizeDAG() {
  SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
    assert(getTypeAction(Op.getValueType()) == Legal &&
           "Caller should expand or promote operands that are not legal!");
+  SDNode *Node = Op.Val;
  
    // If this operation defines any values that cannot be represented in a
    // register on this target, make sure to expand or promote them.
-  if (Op.Val->getNumValues() > 1) {
-    for (unsigned i = 0, e = Op.Val->getNumValues(); i != e; ++i)
-      switch (getTypeAction(Op.Val->getValueType(i))) {
+  if (Node->getNumValues() > 1) {
+    for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+      switch (getTypeAction(Node->getValueType(i))) {
        case Legal: break;  // Nothing to do.
        case Expand: {
          SDOperand T1, T2;
@@ -182,13 +185,14 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
        }
    }
  
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
    std::map<SDOperand, SDOperand>::iterator I = LegalizedNodes.find(Op);
    if (I != LegalizedNodes.end()) return I->second;
  
    SDOperand Tmp1, Tmp2, Tmp3;
  
    SDOperand Result = Op;
-  SDNode *Node = Op.Val;
  
    switch (Node->getOpcode()) {
    default:
@@ -304,6 +308,7 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
      for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
        SDOperand Op = Node->getOperand(i);
        // Fold single-use TokenFactor nodes into this token factor as we go.
+      // FIXME: This is something that the DAGCombiner should do!!
        if (Op.getOpcode() == ISD::TokenFactor && Op.hasOneUse()) {
          Changed = true;
          for (unsigned j = 0, e = Op.getNumOperands(); j != e; ++j)
@@ -322,9 +327,22 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
    case ISD::ADJCALLSTACKUP:
      Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
      // There is no need to legalize the size argument (Operand #1)
-    if (Tmp1 != Node->getOperand(0))
-      Result = DAG.getNode(Node->getOpcode(), MVT::Other, Tmp1,
-                           Node->getOperand(1));
+    Tmp2 = Node->getOperand(0);
+    if (Tmp1 != Tmp2) {
+      Node->setAdjCallChain(Tmp1);
+
+      // If moving the operand from pointing to Tmp2 dropped its use count to 1,
+      // this will cause the maps used to memoize results to get confused.
+      // Create and add a dummy use, just to increase its use count.  This will
+      // be removed at the end of legalize when dead nodes are removed.
+      if (Tmp2.Val->hasOneUse())
+        DAG.getNode(ISD::PCMARKER, MVT::Other, Tmp2,
+                    DAG.getConstant(0, MVT::i32));
+    }
+    // Note that we do not create new ADJCALLSTACK DOWN/UP nodes here.  These
+    // nodes are treated specially and are mutated in place.  This makes the dag
+    // legalization process more efficient and also makes libcall insertion
+    // easier.
      break;
    case ISD::DYNAMIC_STACKALLOC:
      Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
@@ -635,16 +653,16 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
        if (!TLI.isLittleEndian())
          std::swap(Lo, Hi);
  
-      Lo = DAG.getNode(ISD::STORE, MVT::Other,Tmp1, Lo, Tmp2,Node->getOperand(3));
-
+      Lo = DAG.getNode(ISD::STORE, MVT::Other, Tmp1, Lo, Tmp2,
+                       Node->getOperand(3));
        unsigned IncrementSize = MVT::getSizeInBits(Hi.getValueType())/8;
        Tmp2 = DAG.getNode(ISD::ADD, Tmp2.getValueType(), Tmp2,
                           getIntPtrConstant(IncrementSize));
        assert(isTypeLegal(Tmp2.getValueType()) &&
               "Pointers must be legal!");
        //Again, claiming both parts of the store came form the same Instr
-      Hi = DAG.getNode(ISD::STORE, MVT::Other, Tmp1, Hi, Tmp2, Node->getOperand(3));
-                              
+      Hi = DAG.getNode(ISD::STORE, MVT::Other, Tmp1, Hi, Tmp2,
+                       Node->getOperand(3));
        Result = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo, Hi);
        break;
      }
@@ -908,6 +926,7 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
        } else {
          assert(0 && "Unknown op!");
        }
+
        std::pair<SDOperand,SDOperand> CallResult =
          TLI.LowerCallTo(Tmp1, Type::VoidTy, false,
                          DAG.getExternalSymbol(FnName, IntPtr), Args, DAG);
@@ -1079,7 +1098,8 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
      case TargetLowering::Promote: {
        MVT::ValueType OVT = Tmp1.getValueType();
        MVT::ValueType NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
-      //Zero extend the argument
+
+      // Zero extend the argument.
        Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, NVT, Tmp1);
        // Perform the larger operation, then subtract if needed.
        Tmp1 = DAG.getNode(Node->getOpcode(), Node->getValueType(0), Tmp1);
@@ -1090,7 +1110,7 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
          break;
        case ISD::CTTZ:
          //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
-        Tmp2 = DAG.getSetCC(ISD::SETEQ, MVT::i1, Tmp1, 
+        Tmp2 = DAG.getSetCC(ISD::SETEQ, TLI.getSetCCResultTy(), Tmp1, 
                              DAG.getConstant(getSizeInBits(NVT), NVT));
          Result = DAG.getNode(ISD::SELECT, NVT, Tmp2, 
                             DAG.getConstant(getSizeInBits(OVT),NVT), Tmp1);
@@ -1110,19 +1130,18 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
        switch(Node->getOpcode())
        {
        case ISD::CTPOP: {
-        static const uint64_t mask[6][9] = {
-          {0, 0x55, 0x5555, 0, 0x55555555, 0, 0, 0, 0x5555555555555555ULL},
-          {0, 0x33, 0x3333, 0, 0x33333333, 0, 0, 0, 0x3333333333333333ULL},
-          {0, 0x0F, 0x0F0F, 0, 0x0F0F0F0F, 0, 0, 0, 0x0F0F0F0F0F0F0F0FULL},
-          {0,    0, 0x00FF, 0, 0x00FF00FF, 0, 0, 0, 0x00FF00FF00FF00FFULL},
-          {0,    0,      0, 0, 0x0000FFFF, 0, 0, 0, 0x0000FFFF0000FFFFULL},
-          {0,    0,      0, 0,          0, 0, 0, 0, 0x00000000FFFFFFFFULL}};
+        static const uint64_t mask[6] = {
+          0x5555555555555555ULL, 0x3333333333333333ULL,
+          0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+          0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+        };
          MVT::ValueType VT = Tmp1.getValueType();
-        int len = getSizeInBits(VT);
-        for (int i = 0; (1 << i) <= (len / 2); ++i) {
+        MVT::ValueType ShVT = TLI.getShiftAmountTy();
+        unsigned len = getSizeInBits(VT);
+        for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
            //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
-          Tmp2 = DAG.getConstant(mask[i][len/8], VT);
-          Tmp3 = DAG.getConstant(1 << i, VT);
+          Tmp2 = DAG.getConstant(mask[i], VT);
+          Tmp3 = DAG.getConstant(1ULL << i, ShVT);
            Tmp1 = DAG.getNode(ISD::ADD, VT, 
                               DAG.getNode(ISD::AND, VT, Tmp1, Tmp2),
                               DAG.getNode(ISD::AND, VT,
@@ -1132,10 +1151,50 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
          Result = Tmp1;
          break;
        }
-//       case ISD::CTTZ:
-//         break;
-//       case ISD::CTLZ:
-//         break;
+      case ISD::CTLZ: {
+        /* for now, we do this:
+           x = x | (x >> 1);
+           x = x | (x >> 2);
+           ...
+           x = x | (x >>16);
+           x = x | (x >>32); // for 64-bit input 
+           return popcount(~x);
+        
+           but see also: http://www.hackersdelight.org/HDcode/nlz.cc */
+        MVT::ValueType VT = Tmp1.getValueType();
+        MVT::ValueType ShVT = TLI.getShiftAmountTy();
+        unsigned len = getSizeInBits(VT);
+        for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+          Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+          Tmp1 = DAG.getNode(ISD::OR, VT, Tmp1,  
+                             DAG.getNode(ISD::SRL, VT, Tmp1, Tmp3));
+        }
+        Tmp3 = DAG.getNode(ISD::XOR, VT, Tmp1, DAG.getConstant(~0ULL, VT));
+        Result = LegalizeOp(DAG.getNode(ISD::CTPOP, VT, Tmp3));
+        break;
+      }
+      case ISD::CTTZ: {
+        // for now, we use: { return popcount(~x & (x - 1)); } 
+        // unless the target has ctlz but not ctpop, in which case we use:
+        // { return 32 - nlz(~x & (x-1)); }
+        // see also http://www.hackersdelight.org/HDcode/ntz.cc
+        MVT::ValueType VT = Tmp1.getValueType();
+        Tmp2 = DAG.getConstant(~0ULL, VT);
+        Tmp3 = DAG.getNode(ISD::AND, VT, 
+                           DAG.getNode(ISD::XOR, VT, Tmp1, Tmp2),
+                           DAG.getNode(ISD::SUB, VT, Tmp1,
+                                       DAG.getConstant(1, VT)));
+        // If ISD::CTLZ is legal and CTPOP isn't, then do that instead
+        if (TLI.getOperationAction(ISD::CTPOP, VT) != TargetLowering::Legal &&
+            TLI.getOperationAction(ISD::CTLZ, VT) == TargetLowering::Legal) {
+          Result = LegalizeOp(DAG.getNode(ISD::SUB, VT, 
+                                        DAG.getConstant(getSizeInBits(VT), VT),
+                                        DAG.getNode(ISD::CTLZ, VT, Tmp3)));
+        } else {
+          Result = LegalizeOp(DAG.getNode(ISD::CTPOP, VT, Tmp3));
+        }
+        break;
+      }
        default:
          assert(0 && "Cannot expand this yet!");
          break;
@@ -1192,6 +1251,7 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
          }
          std::vector<std::pair<SDOperand, const Type*> > Args;
          Args.push_back(std::make_pair(Tmp1, T));
+        // FIXME: should use ExpandLibCall!
          std::pair<SDOperand,SDOperand> CallResult =
            TLI.LowerCallTo(DAG.getEntryNode(), T, false,
                            DAG.getExternalSymbol(FnName, VT), Args, DAG);
@@ -1226,7 +1286,6 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
            Node->getOpcode() == ISD::UINT_TO_FP) {
          Result = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP,
                                 Node->getValueType(0), Node->getOperand(0));
-        Result = LegalizeOp(Result);
          break;
        } else if (Node->getOpcode() == ISD::TRUNCATE) {
          // In the expand case, we must be dealing with a truncate, because
@@ -1342,9 +1401,9 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
    }
    }
  
-  if (!Op.Val->hasOneUse())
-    AddLegalizedOperand(Op, Result);
-
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  AddLegalizedOperand(Op, Result);
    return Result;
  }
  
@@ -1360,14 +1419,18 @@ SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) {
    assert(NVT > VT && MVT::isInteger(NVT) == MVT::isInteger(VT) &&
           "Cannot promote to smaller type!");
  
-  std::map<SDOperand, SDOperand>::iterator I = PromotedNodes.find(Op);
-  if (I != PromotedNodes.end()) return I->second;
-
    SDOperand Tmp1, Tmp2, Tmp3;
  
    SDOperand Result;
    SDNode *Node = Op.Val;
  
+  if (!Node->hasOneUse()) {
+    std::map<SDOperand, SDOperand>::iterator I = PromotedNodes.find(Op);
+    if (I != PromotedNodes.end()) return I->second;
+  } else {
+    assert(!PromotedNodes.count(Op) && "Repromoted this node??");
+  }
+
    // Promotion needs an optimization step to clean up after it, and is not
    // careful to avoid operations the target does not support.  Make sure that
    // all generated operations are legalized in the next iteration.
@@ -1485,8 +1548,6 @@ SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) {
      case Expand:
        Result = ExpandIntToFP(Node->getOpcode() == ISD::SINT_TO_FP, NVT,
                               Node->getOperand(0));
-      Result = LegalizeOp(Result);
-
        // Round if we cannot tolerate excess precision.
        if (NoExcessFPPrecision)
          Result = DAG.getNode(ISD::FP_ROUND_INREG, NVT, Result, VT);
@@ -1954,15 +2015,30 @@ static SDNode *FindAdjCallStackUp(SDNode *Node) {
    abort();
  }
  
+/// FindAdjCallStackDown - Given a chained node that is part of a call sequence,
+/// find the ADJCALLSTACKDOWN node that initiates the call sequence.
+static SDNode *FindAdjCallStackDown(SDNode *Node) {
+  assert(Node && "Didn't find adjcallstackdown for a call??");
+  if (Node->getOpcode() == ISD::ADJCALLSTACKDOWN) return Node;
+
+  assert(Node->getOperand(0).getValueType() == MVT::Other &&
+         "Node doesn't have a token chain argument!");
+  return FindAdjCallStackDown(Node->getOperand(0).Val);
+}
+
+
  /// FindInputOutputChains - If we are replacing an operation with a call we need
  /// to find the call that occurs before and the call that occurs after it to
-/// properly serialize the calls in the block.
+/// properly serialize the calls in the block.  The returned operand is the
+/// input chain value for the new call (e.g. the entry node or the previous
+/// call), and OutChain is set to be the chain node to update to point to the
+/// end of the call chain.
  static SDOperand FindInputOutputChains(SDNode *OpNode, SDNode *&OutChain,
                                         SDOperand Entry) {
    SDNode *LatestAdjCallStackDown = Entry.Val;
    SDNode *LatestAdjCallStackUp = 0;
    FindLatestAdjCallStackDown(OpNode, LatestAdjCallStackDown);
-  //std::cerr << "Found node: "; LatestAdjCallStackDown->dump(); std::cerr <<"\n";
+  //std::cerr<<"Found node: "; LatestAdjCallStackDown->dump(); std::cerr <<"\n";
  
    // It is possible that no ISD::ADJCALLSTACKDOWN was found because there is no
    // previous call in the function.  LatestCallStackDown may in that case be
@@ -1974,17 +2050,33 @@ static SDOperand FindInputOutputChains(SDNode *OpNode, SDNode *&OutChain,
      LatestAdjCallStackUp = Entry.Val;
    assert(LatestAdjCallStackUp && "NULL return from FindAdjCallStackUp");
  
-  SDNode *EarliestAdjCallStackUp = 0;
-  FindEarliestAdjCallStackUp(OpNode, EarliestAdjCallStackUp);
+  // Finally, find the first call that this must come before, first we find the
+  // adjcallstackup that ends the call.
+  OutChain = 0;
+  FindEarliestAdjCallStackUp(OpNode, OutChain);
  
-  if (EarliestAdjCallStackUp) {
-    //std::cerr << "Found node: ";
-    //EarliestAdjCallStackUp->dump(); std::cerr <<"\n";
-  }
+  // If we found one, translate from the adj up to the adjdown.
+  if (OutChain)
+    OutChain = FindAdjCallStackDown(OutChain);
  
    return SDOperand(LatestAdjCallStackUp, 0);
  }
  
+/// SpliceCallInto - Given the result chain of a libcall (CallResult), and a 
+void SelectionDAGLegalize::SpliceCallInto(const SDOperand &CallResult,
+                                          SDNode *OutChain) {
+  // Nothing to splice it into?
+  if (OutChain == 0) return;
+
+  assert(OutChain->getOperand(0).getValueType() == MVT::Other);
+  //OutChain->dump();
+
+  // Form a token factor node merging the old inval and the new inval.
+  SDOperand InToken = DAG.getNode(ISD::TokenFactor, MVT::Other, CallResult,
+                                  OutChain->getOperand(0));
+  // Change the node to refer to the new token.
+  OutChain->setAdjCallChain(InToken);
+}
  
  
  // ExpandLibCall - Expand a node into a call to a libcall.  If the result value
@@ -2007,21 +2099,23 @@ SDOperand SelectionDAGLegalize::ExpandLibCall(const char *Name, SDNode *Node,
    }
    SDOperand Callee = DAG.getExternalSymbol(Name, TLI.getPointerTy());
  
-  // We don't care about token chains for libcalls.  We just use the entry
-  // node as our input and ignore the output chain.  This allows us to place
-  // calls wherever we need them to satisfy data dependences.
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
    const Type *RetTy = MVT::getTypeForValueType(Node->getValueType(0));
-  SDOperand Result = TLI.LowerCallTo(InChain, RetTy, false, Callee,
-                                     Args, DAG).first;
-  switch (getTypeAction(Result.getValueType())) {
+  std::pair<SDOperand,SDOperand> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, false, Callee, Args, DAG);
+  SpliceCallInto(CallInfo.second, OutChain);
+
+  NeedsAnotherIteration = true;
+
+  switch (getTypeAction(CallInfo.first.getValueType())) {
    default: assert(0 && "Unknown thing");
    case Legal:
-    return Result;
+    return CallInfo.first;
    case Promote:
      assert(0 && "Cannot promote this yet!");
    case Expand:
      SDOperand Lo;
-    ExpandOp(Result, Lo, Hi);
+    ExpandOp(CallInfo.first, Lo, Hi);
      return Lo;
    }
  }
@@ -2036,19 +2130,7 @@ ExpandIntToFP(bool isSigned, MVT::ValueType DestTy, SDOperand Source) {
           "This is not an expansion!");
    assert(Source.getValueType() == MVT::i64 && "Only handle expand from i64!");
  
-  SDNode *OutChain;
-  SDOperand InChain = FindInputOutputChains(Source.Val, OutChain,
-                                            DAG.getEntryNode());
-
-  const char *FnName = 0;
-  if (isSigned) {
-    if (DestTy == MVT::f32)
-      FnName = "__floatdisf";
-    else {
-      assert(DestTy == MVT::f64 && "Unknown fp value type!");
-      FnName = "__floatdidf";
-    }
-  } else {
+  if (!isSigned) {
      // If this is unsigned, and not supported, first perform the conversion to
      // signed, then adjust the result if the sign bit is set.
      SDOperand SignedConv = ExpandIntToFP(true, DestTy, Source);
@@ -2066,10 +2148,9 @@ ExpandIntToFP(bool isSigned, MVT::ValueType DestTy, SDOperand Source) {
      SDOperand Zero = getIntPtrConstant(0), Four = getIntPtrConstant(4);
      SDOperand CstOffset = DAG.getNode(ISD::SELECT, Zero.getValueType(),
                                        SignSet, Four, Zero);
-    // FIXME: This is almost certainly broken for big-endian systems.  Should
-    // this just put the fudge factor in the low bits of the uint64 constant or?
-    static Constant *FudgeFactor =
-      ConstantUInt::get(Type::ULongTy, 0x5f800000ULL << 32);
+    uint64_t FF = 0x5f800000ULL;
+    if (TLI.isLittleEndian()) FF <<= 32;
+    static Constant *FudgeFactor = ConstantUInt::get(Type::ULongTy, FF);
  
      MachineConstantPool *CP = DAG.getMachineFunction().getConstantPool();
      SDOperand CPIdx = DAG.getConstantPool(CP->getConstantPoolIndex(FudgeFactor),
@@ -2086,17 +2167,41 @@ ExpandIntToFP(bool isSigned, MVT::ValueType DestTy, SDOperand Source) {
      }
      return DAG.getNode(ISD::ADD, DestTy, SignedConv, FudgeInReg);
    }
+
+  // Expand the source, then glue it back together for the call.  We must expand
+  // the source in case it is shared (this pass of legalize must traverse it).
+  SDOperand SrcLo, SrcHi;
+  ExpandOp(Source, SrcLo, SrcHi);
+  Source = DAG.getNode(ISD::BUILD_PAIR, Source.getValueType(), SrcLo, SrcHi);
+
+  SDNode *OutChain = 0;
+  SDOperand InChain = FindInputOutputChains(Source.Val, OutChain,
+                                            DAG.getEntryNode());
+  const char *FnName = 0;
+  if (DestTy == MVT::f32)
+    FnName = "__floatdisf";
+  else {
+    assert(DestTy == MVT::f64 && "Unknown fp value type!");
+    FnName = "__floatdidf";
+  }
+
    SDOperand Callee = DAG.getExternalSymbol(FnName, TLI.getPointerTy());
  
    TargetLowering::ArgListTy Args;
    const Type *ArgTy = MVT::getTypeForValueType(Source.getValueType());
+
    Args.push_back(std::make_pair(Source, ArgTy));
  
    // We don't care about token chains for libcalls.  We just use the entry
    // node as our input and ignore the output chain.  This allows us to place
    // calls wherever we need them to satisfy data dependences.
    const Type *RetTy = MVT::getTypeForValueType(DestTy);
-  return TLI.LowerCallTo(InChain, RetTy, false, Callee, Args, DAG).first;
+
+  std::pair<SDOperand,SDOperand> CallResult =
+    TLI.LowerCallTo(InChain, RetTy, false, Callee, Args, DAG);
+
+  SpliceCallInto(CallResult.second, OutChain);
+  return CallResult.first;
  }
  
  
@@ -2126,6 +2231,8 @@ void SelectionDAGLegalize::ExpandOp(SDOperand Op, SDOperand &Lo, SDOperand &Hi){
        Hi = I->second.second;
        return;
      }
+  } else {
+    assert(!ExpandedNodes.count(Op) && "Re-expanding a node!");
    }
  
    // Expanding to multiple registers needs to perform an optimization step, and
@@ -2170,6 +2277,44 @@ void SelectionDAGLegalize::ExpandOp(SDOperand Op, SDOperand &Lo, SDOperand &Hi){
      Hi = LegalizeOp(Node->getOperand(1));
      break;
  
+  case ISD::CTPOP:
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    Lo = DAG.getNode(ISD::ADD, NVT,          // ctpop(HL) -> ctpop(H)+ctpop(L)
+                     DAG.getNode(ISD::CTPOP, NVT, Lo),
+                     DAG.getNode(ISD::CTPOP, NVT, Hi));
+    Hi = DAG.getConstant(0, NVT);
+    break;
+
+  case ISD::CTLZ: {
+    // ctlz (HL) -> ctlz(H) != 32 ? ctlz(H) : (ctlz(L)+32)
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    SDOperand BitsC = DAG.getConstant(MVT::getSizeInBits(NVT), NVT);
+    SDOperand HLZ = DAG.getNode(ISD::CTLZ, NVT, Hi);
+    SDOperand TopNotZero = DAG.getSetCC(ISD::SETNE, TLI.getSetCCResultTy(),
+                                        HLZ, BitsC);
+    SDOperand LowPart = DAG.getNode(ISD::CTLZ, NVT, Lo);
+    LowPart = DAG.getNode(ISD::ADD, NVT, LowPart, BitsC);
+
+    Lo = DAG.getNode(ISD::SELECT, NVT, TopNotZero, HLZ, LowPart);
+    Hi = DAG.getConstant(0, NVT);
+    break;
+  }
+
+  case ISD::CTTZ: {
+    // cttz (HL) -> cttz(L) != 32 ? cttz(L) : (cttz(H)+32)
+    ExpandOp(Node->getOperand(0), Lo, Hi);
+    SDOperand BitsC = DAG.getConstant(MVT::getSizeInBits(NVT), NVT);
+    SDOperand LTZ = DAG.getNode(ISD::CTTZ, NVT, Lo);
+    SDOperand BotNotZero = DAG.getSetCC(ISD::SETNE, TLI.getSetCCResultTy(),
+                                        LTZ, BitsC);
+    SDOperand HiPart = DAG.getNode(ISD::CTTZ, NVT, Hi);
+    HiPart = DAG.getNode(ISD::ADD, NVT, HiPart, BitsC);
+
+    Lo = DAG.getNode(ISD::SELECT, NVT, BotNotZero, LTZ, HiPart);
+    Hi = DAG.getConstant(0, NVT);
+    break;
+  }
+
    case ISD::LOAD: {
      SDOperand Ch = LegalizeOp(Node->getOperand(0));   // Legalize the chain.
      SDOperand Ptr = LegalizeOp(Node->getOperand(1));  // Legalize the pointer.