When checking for sret-demotion, it needs to use legal types. When using the return...

[oota-llvm.git] / lib / CodeGen / SelectionDAG / SelectionDAGBuilder.cpp
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index ec949fc27b3a7cb3ff1288fc9e18dc3dafbbab22..a3fb34576ac0730884e8fab0ab816f406d06e4de 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -70,13 +70,13 @@ LimitFPPrecision("limit-float-precision",
  
  namespace {
    /// RegsForValue - This struct represents the registers (physical or virtual)
-  /// that a particular set of values is assigned, and the type information about
-  /// the value. The most common situation is to represent one value at a time,
-  /// but struct or array values are handled element-wise as multiple values.
-  /// The splitting of aggregates is performed recursively, so that we never
-  /// have aggregate-typed registers. The values at this point do not necessarily
-  /// have legal types, so each value may require one or more registers of some
-  /// legal type.
+  /// that a particular set of values is assigned, and the type information
+  /// about the value. The most common situation is to represent one value at a
+  /// time, but struct or array values are handled element-wise as multiple
+  /// values.  The splitting of aggregates is performed recursively, so that we
+  /// never have aggregate-typed registers. The values at this point do not
+  /// necessarily have legal types, so each value may require one or more
+  /// registers of some legal type.
    ///
    struct RegsForValue {
      /// TLI - The TargetLowering object.
@@ -243,11 +243,13 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
        EVT IntermediateVT, RegisterVT;
        unsigned NumIntermediates;
        unsigned NumRegs =
-        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, 
+        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
                                     NumIntermediates, RegisterVT);
-      assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+      assert(NumRegs == NumParts
+             && "Part count doesn't match vector breakdown!");
        NumParts = NumRegs; // Silence a compiler warning.
-      assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+      assert(RegisterVT == PartVT
+             && "Part type doesn't match vector breakdown!");
        assert(RegisterVT == Parts[0].getValueType() &&
               "Part type doesn't match part!");
  
@@ -260,8 +262,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
            Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i], 1,
                                      PartVT, IntermediateVT);
        } else if (NumParts > 0) {
-        // If the intermediate type was expanded, build the intermediate operands
-        // from the parts.
+        // If the intermediate type was expanded, build the intermediate
+        // operands from the parts.
          assert(NumParts % NumIntermediates == 0 &&
                 "Must expand into a divisible number of parts!");
          unsigned Factor = NumParts / NumIntermediates;
@@ -270,8 +272,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
                                      PartVT, IntermediateVT);
        }
  
-      // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate
-      // operands.
+      // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+      // intermediate operands.
        Val = DAG.getNode(IntermediateVT.isVector() ?
                          ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
                          ValueVT, &Ops[0], NumIntermediates);
@@ -462,7 +464,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
      // The number of parts is a power of 2.  Repeatedly bisect the value using
      // EXTRACT_ELEMENT.
      Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
-                           EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()),
+                           EVT::getIntegerVT(*DAG.getContext(),
+                                             ValueVT.getSizeInBits()),
                             Val);
  
      if (DisableScheduling)
@@ -797,18 +800,19 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
                               SDNodeOrder, Chain, NULL);
  }
  
-/// Get the EVTs and ArgFlags collections that represent the return type
-/// of the given function.  This does not require a DAG or a return value, and
-/// is suitable for use before any DAGs for the function are constructed.
+/// Get the EVTs and ArgFlags collections that represent the legalized return 
+/// type of the given function.  This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
  static void getReturnInfo(const Type* ReturnType,
                     Attributes attr, SmallVectorImpl<EVT> &OutVTs,
                     SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
                     TargetLowering &TLI,
                     SmallVectorImpl<uint64_t> *Offsets = 0) {
    SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, ReturnType, ValueVTs, Offsets);
+  ComputeValueVTs(TLI, ReturnType, ValueVTs);
    unsigned NumValues = ValueVTs.size();
-  if ( NumValues == 0 ) return;
+  if (NumValues == 0) return;
+  unsigned Offset = 0;
  
    for (unsigned j = 0, f = NumValues; j != f; ++j) {
      EVT VT = ValueVTs[j];
@@ -831,6 +835,9 @@ static void getReturnInfo(const Type* ReturnType,
  
      unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
      EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+    unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
+                        PartVT.getTypeForEVT(ReturnType->getContext()));
+
      // 'inreg' on function refers to return value
      ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
      if (attr & Attribute::InReg)
@@ -845,6 +852,11 @@ static void getReturnInfo(const Type* ReturnType,
      for (unsigned i = 0; i < NumParts; ++i) {
        OutVTs.push_back(PartVT);
        OutFlags.push_back(Flags);
+      if (Offsets)
+      {
+        Offsets->push_back(Offset);
+        Offset += PartSize;
+      }
      }
    }
  }
@@ -853,7 +865,7 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
    SDValue Chain = getControlRoot();
    SmallVector<ISD::OutputArg, 8> Outs;
    FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
-  
+
    if (!FLI.CanLowerReturn) {
      unsigned DemoteReg = FLI.DemoteRegister;
      const Function *F = I.getParent()->getParent();
@@ -862,12 +874,12 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
      // Leave Outs empty so that LowerReturn won't try to load return
      // registers the usual way.
      SmallVector<EVT, 1> PtrValueVTs;
-    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), 
+    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
                      PtrValueVTs);
  
      SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
      SDValue RetOp = getValue(I.getOperand(0));
-  
+
      SmallVector<EVT, 4> ValueVTs;
      SmallVector<uint64_t, 4> Offsets;
      ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
@@ -900,7 +912,7 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
        ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
        unsigned NumValues = ValueVTs.size();
        if (NumValues == 0) continue;
-  
+
        SDValue RetOp = getValue(I.getOperand(i));
        for (unsigned j = 0, f = NumValues; j != f; ++j) {
          EVT VT = ValueVTs[j];
@@ -913,8 +925,8 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
          else if (F->paramHasAttr(0, Attribute::ZExt))
            ExtendKind = ISD::ZERO_EXTEND;
  
-        // FIXME: C calling convention requires the return type to be promoted to
-        // at least 32-bit. But this is not necessary for non-C calling
+        // FIXME: C calling convention requires the return type to be promoted
+        // to at least 32-bit. But this is not necessary for non-C calling
          // conventions. The frontend should mark functions whose return values
          // require promoting with signext or zeroext attributes.
          if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
@@ -1192,6 +1204,18 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
      return false;
    }
  
+  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+      Cases[0].CC == Cases[1].CC &&
+      isa<Constant>(Cases[0].CmpRHS) &&
+      cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+    if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+      return false;
+    if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+      return false;
+  }
+  
    return true;
  }
  
@@ -1730,7 +1754,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
    if (Density < 0.4)
      return false;
  
-  DEBUG(errs() << "Lowering jump table\n"
+  DEBUG(dbgs() << "Lowering jump table\n"
                 << "First entry: " << First << ". Last entry: " << Last << '\n'
                 << "Range: " << Range
                 << "Size: " << TSize << ". Density: " << Density << "\n\n");
@@ -1834,7 +1858,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
  
    APInt LSize = FrontCase.size();
    APInt RSize = TSize-LSize;
-  DEBUG(errs() << "Selecting best pivot: \n"
+  DEBUG(dbgs() << "Selecting best pivot: \n"
                 << "First: " << First << ", Last: " << Last <<'\n'
                 << "LSize: " << LSize << ", RSize: " << RSize << '\n');
    for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
@@ -1844,13 +1868,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
      APInt Range = ComputeRange(LEnd, RBegin);
      assert((Range - 2ULL).isNonNegative() &&
             "Invalid case distance");
-    double LDensity = (double)LSize.roundToDouble() / 
+    double LDensity = (double)LSize.roundToDouble() /
                             (LEnd - First + 1ULL).roundToDouble();
      double RDensity = (double)RSize.roundToDouble() /
                             (Last - RBegin + 1ULL).roundToDouble();
      double Metric = Range.logBase2()*(LDensity+RDensity);
      // Should always split in some non-trivial place
-    DEBUG(errs() <<"=>Step\n"
+    DEBUG(dbgs() <<"=>Step\n"
                   << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
                   << "LDensity: " << LDensity
                   << ", RDensity: " << RDensity << '\n'
@@ -1858,7 +1882,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
      if (FMetric < Metric) {
        Pivot = J;
        FMetric = Metric;
-      DEBUG(errs() << "Current metric set to: " << FMetric << '\n');
+      DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
      }
  
      LSize += J->size();
@@ -1962,15 +1986,16 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
        // Don't bother the code below, if there are too much unique destinations
        return false;
    }
-  DEBUG(errs() << "Total number of unique destinations: " << Dests.size() << '\n'
-               << "Total number of comparisons: " << numCmps << '\n');
+  DEBUG(dbgs() << "Total number of unique destinations: "
+        << Dests.size() << '\n'
+        << "Total number of comparisons: " << numCmps << '\n');
  
    // Compute span of values.
    const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
    const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
    APInt cmpRange = maxValue - minValue;
  
-  DEBUG(errs() << "Compare range: " << cmpRange << '\n'
+  DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
                 << "Low bound: " << minValue << '\n'
                 << "High bound: " << maxValue << '\n');
  
@@ -1980,7 +2005,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
         !(Dests.size() >= 3 && numCmps >= 6)))
      return false;
  
-  DEBUG(errs() << "Emitting bit tests\n");
+  DEBUG(dbgs() << "Emitting bit tests\n");
    APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
  
    // Optimize the case where all the case values fit in a
@@ -2030,9 +2055,9 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
  
    const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
  
-  DEBUG(errs() << "Cases:\n");
+  DEBUG(dbgs() << "Cases:\n");
    for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
-    DEBUG(errs() << "Mask: " << CasesBits[i].Mask
+    DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
                   << ", Bits: " << CasesBits[i].Bits
                   << ", BB: " << CasesBits[i].BB << '\n');
  
@@ -2131,7 +2156,7 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
    // create a binary search tree from them.
    CaseVector Cases;
    size_t numCmps = Clusterify(Cases, SI);
-  DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
+  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
                 << ". Total compares: " << numCmps << '\n');
    numCmps = 0;
  
@@ -2196,7 +2221,7 @@ void SelectionDAGBuilder::visitFSub(User &I) {
        if (CV == CNZ) {
          SDValue Op2 = getValue(I.getOperand(1));
          SDValue Res = DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                                  Op2.getValueType(), Op2); 
+                                  Op2.getValueType(), Op2);
          setValue(&I, Res);
  
          if (DisableScheduling)
@@ -2284,7 +2309,7 @@ void SelectionDAGBuilder::visitICmp(User &I) {
    SDValue Op1 = getValue(I.getOperand(0));
    SDValue Op2 = getValue(I.getOperand(1));
    ISD::CondCode Opcode = getICmpCondCode(predicate);
-  
+
    EVT DestVT = TLI.getValueType(I.getType());
    SDValue Res = DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode);
    setValue(&I, Res);
@@ -2539,7 +2564,7 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
    // Convert the ConstantVector mask operand into an array of ints, with -1
    // representing undef values.
    SmallVector<Constant*, 8> MaskElts;
-  cast<Constant>(I.getOperand(2))->getVectorElements(*DAG.getContext(), 
+  cast<Constant>(I.getOperand(2))->getVectorElements(*DAG.getContext(),
                                                       MaskElts);
    unsigned MaskNumElts = MaskElts.size();
    for (unsigned i = 0; i != MaskNumElts; ++i) {
@@ -2548,7 +2573,7 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
      else
        Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
    }
-  
+
    EVT VT = TLI.getValueType(I.getType());
    EVT SrcVT = Src1.getValueType();
    unsigned SrcNumElts = SrcVT.getVectorNumElements();
@@ -2591,12 +2616,12 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
      SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
      MOps1[0] = Src1;
      MOps2[0] = Src2;
-    
-    Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, 
-                                                  getCurDebugLoc(), VT, 
+
+    Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+                                                  getCurDebugLoc(), VT,
                                                    &MOps1[0], NumConcat);
      Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
-                                                  getCurDebugLoc(), VT, 
+                                                  getCurDebugLoc(), VT,
                                                    &MOps2[0], NumConcat);
  
      // Readjust mask for new input vector length.
@@ -2609,7 +2634,7 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
          MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
      }
  
-    SDValue Res = DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, 
+    SDValue Res = DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
                                         &MappedOps[0]);
      setValue(&I, Res);
  
@@ -2634,7 +2659,7 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
        int Input = 0;
        if (Idx < 0)
          continue;
-      
+
        if (Idx >= (int)SrcNumElts) {
          Input = 1;
          Idx -= SrcNumElts;
@@ -2647,7 +2672,8 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) {
  
      // Check if the access is smaller than the vector size and can we find
      // a reasonable extract index.
-    int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not Extract.
+    int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not
+                                 // Extract.
      int StartIdx[2];  // StartIdx to extract from
      for (int Input=0; Input < 2; ++Input) {
        if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
@@ -2927,20 +2953,20 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
               I.getAlignment());
  
    SDValue AllocSize = getValue(I.getArraySize());
-  
+
    AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),
                            AllocSize,
                            DAG.getConstant(TySize, AllocSize.getValueType()));
-  
+
    if (DisableScheduling)
      DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder);
-  
+
    EVT IntPtr = TLI.getPointerTy();
    AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
  
    if (DisableScheduling)
      DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder);
-  
+
    // Handle alignment.  If the requested alignment is less than or equal to
    // the stack alignment, ignore it.  If the size is greater than or equal to
    // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
@@ -3152,7 +3178,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
    } else if (!HasChain) {
      Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
                           VTs, &Ops[0], Ops.size());
-  } else if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
+  } else if (!I.getType()->isVoidTy()) {
      Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
                           VTs, &Ops[0], Ops.size());
    } else {
@@ -3171,7 +3197,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
        DAG.setRoot(Chain);
    }
  
-  if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
+  if (!I.getType()->isVoidTy()) {
      if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
        EVT VT = TLI.getValueType(PTy);
        Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
@@ -4261,6 +4287,60 @@ SelectionDAGBuilder::visitPow(CallInst &I) {
    setValue(&I, result);
  }
  
+
+/// ExpandPowI - Expand a llvm.powi intrinsic.
+static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
+                          SelectionDAG &DAG) {
+  // If RHS is a constant, we can expand this out to a multiplication tree,
+  // otherwise we end up lowering to a call to __powidf2 (for example).  When
+  // optimizing for size, we only want to do this if the expansion would produce
+  // a small number of multiplies, otherwise we do the full expansion.
+  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+    // Get the exponent as a positive value.
+    unsigned Val = RHSC->getSExtValue();
+    if ((int)Val < 0) Val = -Val;
+
+    // powi(x, 0) -> 1.0
+    if (Val == 0)
+      return DAG.getConstantFP(1.0, LHS.getValueType());
+
+    Function *F = DAG.getMachineFunction().getFunction();
+    if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
+        // If optimizing for size, don't insert too many multiplies.  This
+        // inserts up to 5 multiplies.
+        CountPopulation_32(Val)+Log2_32(Val) < 7) {
+      // We use the simple binary decomposition method to generate the multiply
+      // sequence.  There are more optimal ways to do this (for example,
+      // powi(x,15) generates one more multiply than it should), but this has
+      // the benefit of being both really simple and much better than a libcall.
+      SDValue Res;  // Logically starts equal to 1.0
+      SDValue CurSquare = LHS;
+      while (Val) {
+        if (Val & 1) {
+          if (Res.getNode())
+            Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
+          else
+            Res = CurSquare;  // 1.0*CurSquare.
+        }
+
+        CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
+                                CurSquare, CurSquare);
+        Val >>= 1;
+      }
+
+      // If the original was negative, invert the result, producing 1/(x*x*x).
+      if (RHSC->getSExtValue() < 0)
+        Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
+                          DAG.getConstantFP(1.0, LHS.getValueType()), Res);
+      return Res;
+    }
+  }
+
+  // Otherwise, expand to a libcall.
+  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
+}
+
+
  /// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
  /// we want to emit this as a call to a named external function, return the name
  /// otherwise lower it and return null.
@@ -4347,21 +4427,15 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
        DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
      return 0;
    }
-  case Intrinsic::dbg_stoppoint: 
-  case Intrinsic::dbg_region_start:
-  case Intrinsic::dbg_region_end:
-  case Intrinsic::dbg_func_start:
-    // FIXME - Remove this instructions once the dust settles.
-    return 0;
    case Intrinsic::dbg_declare: {
-    if (OptLevel != CodeGenOpt::None) 
+    if (OptLevel != CodeGenOpt::None)
        // FIXME: Variable debug info is not supported here.
        return 0;
      DwarfWriter *DW = DAG.getDwarfWriter();
      if (!DW)
        return 0;
      DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
-    if (!isValidDebugInfoIntrinsic(DI, CodeGenOpt::None))
+    if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
        return 0;
  
      MDNode *Variable = DI.getVariable();
@@ -4374,7 +4448,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
        return 0;
      DenseMap<const AllocaInst*, int>::iterator SI =
        FuncInfo.StaticAllocaMap.find(AI);
-    if (SI == FuncInfo.StaticAllocaMap.end()) 
+    if (SI == FuncInfo.StaticAllocaMap.end())
        return 0; // VLAs.
      int FI = SI->second;
  
@@ -4536,10 +4610,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
        DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
      return 0;
    case Intrinsic::powi:
-    Res = DAG.getNode(ISD::FPOWI, dl,
-                      getValue(I.getOperand(1)).getValueType(),
-                      getValue(I.getOperand(1)),
-                      getValue(I.getOperand(2)));
+    Res = ExpandPowI(dl, getValue(I.getOperand(1)), getValue(I.getOperand(2)),
+                     DAG);
      setValue(&I, Res);
      if (DisableScheduling)
        DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
@@ -4925,10 +4997,10 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
    SmallVector<EVT, 4> OutVTs;
    SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
    SmallVector<uint64_t, 4> Offsets;
-  getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), 
+  getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
                  OutVTs, OutsFlags, TLI, &Offsets);
  
-  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), 
+  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
                          FTy->isVarArg(), OutVTs, OutsFlags, DAG);
  
    SDValue DemoteStackSlot;
@@ -5035,16 +5107,37 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
                                  MVT::Other, &Chains[0], NumValues);
      PendingLoads.push_back(Chain);
+    
+    // Collect the legal value parts into potentially illegal values
+    // that correspond to the original function's return values.
+    SmallVector<EVT, 4> RetTys;
+    RetTy = FTy->getReturnType();
+    ComputeValueVTs(TLI, RetTy, RetTys);
+    ISD::NodeType AssertOp = ISD::DELETED_NODE;
+    SmallVector<SDValue, 4> ReturnValues;
+    unsigned CurReg = 0;
+    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+      EVT VT = RetTys[I];
+      EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
+      unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
+  
+      SDValue ReturnValue =
+        getCopyFromParts(DAG, getCurDebugLoc(), SDNodeOrder, &Values[CurReg], NumRegs,
+                         RegisterVT, VT, AssertOp);
+      ReturnValues.push_back(ReturnValue);
+      if (DisableScheduling)
+        DAG.AssignOrdering(ReturnValue.getNode(), SDNodeOrder);
+      CurReg += NumRegs;
+    }
+    SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                              DAG.getVTList(&RetTys[0], RetTys.size()),
+                              &ReturnValues[0], ReturnValues.size());
  
-    SDValue MV = DAG.getNode(ISD::MERGE_VALUES,
-                             getCurDebugLoc(),
-                             DAG.getVTList(&OutVTs[0], NumValues),
-                             &Values[0], NumValues);
-    setValue(CS.getInstruction(), MV);
+    setValue(CS.getInstruction(), Res);
  
      if (DisableScheduling) {
        DAG.AssignOrdering(Chain.getNode(), SDNodeOrder);
-      DAG.AssignOrdering(MV.getNode(), SDNodeOrder);
+      DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
      }
    }
  
@@ -5088,23 +5181,23 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
  
  static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
                               SelectionDAGBuilder &Builder) {
-  
+
    // Check to see if this load can be trivially constant folded, e.g. if the
    // input is from a string literal.
    if (Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
      // Cast pointer to the type we really want to load.
      LoadInput = ConstantExpr::getBitCast(LoadInput,
                                           PointerType::getUnqual(LoadTy));
-    
+
      if (Constant *LoadCst = ConstantFoldLoadFromConstPtr(LoadInput, Builder.TD))
        return Builder.getValue(LoadCst);
    }
-  
+
    // Otherwise, we have to emit the load.  If the pointer is to unfoldable but
    // still constant memory, the input chain can be the entry node.
    SDValue Root;
    bool ConstantMemory = false;
-  
+
    // Do not serialize (non-volatile) loads of constant memory with anything.
    if (Builder.AA->pointsToConstantMemory(PtrVal)) {
      Root = Builder.DAG.getEntryNode();
@@ -5113,12 +5206,12 @@ static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
      // Do not serialize non-volatile loads against each other.
      Root = Builder.DAG.getRoot();
    }
-  
+
    SDValue Ptr = Builder.getValue(PtrVal);
    SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
                                          Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
                                          false /*volatile*/, 1 /* align=1 */);
-  
+
    if (!ConstantMemory)
      Builder.PendingLoads.push_back(LoadVal.getValue(1));
    return LoadVal;
@@ -5132,15 +5225,15 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
    // Verify that the prototype makes sense.  int memcmp(void*,void*,size_t)
    if (I.getNumOperands() != 4)
      return false;
-  
+
    Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
    if (!isa<PointerType>(LHS->getType()) || !isa<PointerType>(RHS->getType()) ||
        !isa<IntegerType>(I.getOperand(3)->getType()) ||
        !isa<IntegerType>(I.getType()))
-    return false;    
-  
+    return false;
+
    ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
-  
+
    // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
    // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
    if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
@@ -5159,25 +5252,25 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
        break;
      case 4:
        LoadVT = MVT::i32;
-      LoadTy = Type::getInt32Ty(Size->getContext()); 
+      LoadTy = Type::getInt32Ty(Size->getContext());
        break;
      case 8:
        LoadVT = MVT::i64;
-      LoadTy = Type::getInt64Ty(Size->getContext()); 
+      LoadTy = Type::getInt64Ty(Size->getContext());
        break;
          /*
      case 16:
        LoadVT = MVT::v4i32;
-      LoadTy = Type::getInt32Ty(Size->getContext()); 
+      LoadTy = Type::getInt32Ty(Size->getContext());
        LoadTy = VectorType::get(LoadTy, 4);
        break;
           */
      }
-    
+
      // This turns into unaligned loads.  We only do this if the target natively
      // supports the MVT we'll be loading or if it is small enough (<= 4) that
      // we'll only produce a small number of byte loads.
-    
+
      // Require that we can find a legal MVT, and only do this if the target
      // supports unaligned loads of that type.  Expanding into byte loads would
      // bloat the code.
@@ -5187,11 +5280,11 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
        if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
          ActuallyDoIt = false;
      }
-    
+
      if (ActuallyDoIt) {
        SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
        SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
-      
+
        SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
                                   ISD::SETNE);
        EVT CallVT = TLI.getValueType(I.getType(), true);
@@ -5199,8 +5292,8 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
        return true;
      }
    }
-  
-  
+
+
    return false;
  }
  
@@ -5570,7 +5663,7 @@ public:
    /// getCallOperandValEVT - Return the EVT of the Value* that this operand
    /// corresponds to.  If there is no Value* for this operand, it returns
    /// MVT::Other.
-  EVT getCallOperandValEVT(LLVMContext &Context, 
+  EVT getCallOperandValEVT(LLVMContext &Context,
                             const TargetLowering &TLI,
                             const TargetData *TD) const {
      if (CallOperandVal == 0) return MVT::Other;
@@ -5691,7 +5784,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
          // bitcast to the corresponding integer type.  This turns an f64 value
          // into i64, which can be passed with two i32 values on a 32-bit
          // machine.
-        RegVT = EVT::getIntegerVT(Context, 
+        RegVT = EVT::getIntegerVT(Context,
                                    OpInfo.ConstraintVT.getSizeInBits());
          OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
                                           RegVT, OpInfo.CallOperand);
@@ -5758,7 +5851,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
      OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
      return;
    }
-  
+
    // This is a reference to a register class that doesn't directly correspond
    // to an LLVM register class.  Allocate NumRegs consecutive, available,
    // registers from the class.
@@ -5820,7 +5913,7 @@ hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
        if (CType == TargetLowering::C_Memory)
          return true;
      }
-    
+
      // Indirect operand accesses access memory.
      if (CI.isIndirect)
        return true;
@@ -5845,9 +5938,9 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
      ConstraintInfos = IA->ParseConstraints();
  
    bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
-  
+
    SDValue Chain, Flag;
-  
+
    // We won't need to flush pending loads if this asm doesn't touch
    // memory and is nonvolatile.
    if (hasMemory || IA->hasSideEffects())
@@ -5874,7 +5967,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
  
        // The return value of the call is this value.  As such, there is no
        // corresponding argument.
-      assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
+      assert(!CS.getType()->isVoidTy() &&
               "Bad inline asm!");
        if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
          OpVT = TLI.getValueType(STy->getElementType(ResNo));
@@ -5999,7 +6092,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
    std::vector<SDValue> AsmNodeOperands;
    AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
    AsmNodeOperands.push_back(
-          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));
+          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
+                                      TLI.getPointerTy()));
  
  
    // Loop over all of the inputs, copying the operand values into the
@@ -6043,8 +6137,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
                                                        OpInfo.CallOperandVal));
        } else {
          // This is the result value of the call.
-        assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
-               "Bad inline asm!");
+        assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
          // Concatenate this output onto the outputs list.
          RetValRegs.append(OpInfo.AssignedRegs);
        }
@@ -6099,8 +6192,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
            MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
            for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
                 i != e; ++i)
-            MatchedRegs.Regs.
-              push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
+            MatchedRegs.Regs.push_back
+              (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
  
            // Use the produced MatchedRegs object to
            MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
@@ -6509,11 +6602,11 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
    // Check whether the function can return without sret-demotion.
    SmallVector<EVT, 4> OutVTs;
    SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
-  getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), 
+  getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
                  OutVTs, OutsFlags, TLI);
    FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
  
-  FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(), 
+  FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(),
                                            OutVTs, OutsFlags, DAG);
    if (!FLI.CanLowerReturn) {
      // Put in an sret pointer parameter before all the other parameters.
@@ -6624,7 +6717,8 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
      MachineRegisterInfo& RegInfo = MF.getRegInfo();
      unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
      FLI.DemoteRegister = SRetReg;
-    NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), SRetReg, ArgValue);
+    NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
+                                    SRetReg, ArgValue);
      DAG.setRoot(NewRoot);
  
      // i indexes lowered arguments.  Bump it past the hidden sret argument.