[WinEH] Recognize CoreCLR personality function

[oota-llvm.git] / lib / CodeGen / SelectionDAG / SelectionDAGBuilder.cpp
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 2c3c0eb101a0c925bb612073bd5e5c28eff985eb..fc140f655716cf7427a4f44520a3b10db5fd5b45 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -22,6 +22,7 @@
  #include "llvm/Analysis/ConstantFolding.h"
  #include "llvm/Analysis/TargetLibraryInfo.h"
  #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
  #include "llvm/CodeGen/FastISel.h"
  #include "llvm/CodeGen/FunctionLoweringInfo.h"
  #include "llvm/CodeGen/GCMetadata.h"
@@ -79,7 +80,7 @@ LimitFPPrecision("limit-float-precision",
                   cl::init(0));
  
  static cl::opt<bool>
-EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden,
+EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
                  cl::desc("Enable fast-math-flags for DAG nodes"));
  
  // Limit the width of DAG chains. This is important in general to prevent
@@ -319,9 +320,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
      assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
        "Cannot handle this kind of promotion");
      // Promoted vector extract
-    bool Smaller = ValueVT.bitsLE(PartEVT);
-    return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
-                       DL, ValueVT, Val);
+    return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
  
    }
  
@@ -339,11 +338,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
    }
  
    if (ValueVT.getVectorNumElements() == 1 &&
-      ValueVT.getVectorElementType() != PartEVT) {
-    bool Smaller = ValueVT.bitsLE(PartEVT);
-    Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
-                       DL, ValueVT.getScalarType(), Val);
-  }
+      ValueVT.getVectorElementType() != PartEVT)
+    Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType());
  
    return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
  }
@@ -520,9 +516,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
                 PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
  
        // Promoted vector extract
-      bool Smaller = PartEVT.bitsLE(ValueVT);
-      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
-                        DL, PartVT, Val);
+      Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
      } else{
        // Vector -> scalar conversion.
        assert(ValueVT.getVectorNumElements() == 1 &&
@@ -531,9 +525,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
            ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
            DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
  
-      bool Smaller = ValueVT.bitsLE(PartVT);
-      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
-                         DL, PartVT, Val);
+      Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
      }
  
      Parts[0] = Val;
@@ -595,8 +587,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
                             const DataLayout &DL, unsigned Reg, Type *Ty) {
    ComputeValueVTs(TLI, DL, Ty, ValueVTs);
  
-  for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    EVT ValueVT = ValueVTs[Value];
+  for (EVT ValueVT : ValueVTs) {
      unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
      MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
      for (unsigned i = 0; i != NumRegs; ++i)
@@ -1168,6 +1159,109 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
    llvm_unreachable("Can't get register for value!");
  }
  
+void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
+  llvm_unreachable("should never codegen catchpads");
+}
+
+void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
+  // Update machine-CFG edge.
+  MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
+  FuncInfo.MBB->addSuccessor(TargetMBB);
+
+  // Figure out the funclet membership for the catchret's successor.
+  // This will be used by the FuncletLayout pass to determine how to order the
+  // BB's.
+  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  WinEHFuncInfo &EHInfo =
+      MMI.getWinEHFuncInfo(DAG.getMachineFunction().getFunction());
+  const BasicBlock *SuccessorColor = EHInfo.CatchRetSuccessorColorMap[&I];
+  assert(SuccessorColor && "No parent funclet for catchret!");
+  MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
+  assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
+
+  // Create the terminator node.
+  SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
+                            getControlRoot(), DAG.getBasicBlock(TargetMBB),
+                            DAG.getBasicBlock(SuccessorColorMBB));
+  DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCatchEndPad(const CatchEndPadInst &I) {
+  llvm_unreachable("should never codegen catchendpads");
+}
+
+void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
+  // Don't emit any special code for the cleanuppad instruction. It just marks
+  // the start of a funclet.
+  FuncInfo.MBB->setIsEHFuncletEntry();
+  FuncInfo.MBB->setIsCleanupFuncletEntry();
+}
+
+/// When an invoke or a cleanupret unwinds to the next EH pad, there are
+/// many places it could ultimately go. In the IR, we have a single unwind
+/// destination, but in the machine CFG, we enumerate all the possible blocks.
+/// This function skips over imaginary basic blocks that hold catchpad,
+/// terminatepad, or catchendpad instructions, and finds all the "real" machine
+/// basic block destinations.
+static void
+findUnwindDestinations(FunctionLoweringInfo &FuncInfo,
+                       const BasicBlock *EHPadBB,
+                       SmallVectorImpl<MachineBasicBlock *> &UnwindDests) {
+  EHPersonality Personality =
+    classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+  while (EHPadBB) {
+    const Instruction *Pad = EHPadBB->getFirstNonPHI();
+    if (isa<LandingPadInst>(Pad)) {
+      // Stop on landingpads. They are not funclets.
+      UnwindDests.push_back(FuncInfo.MBBMap[EHPadBB]);
+      break;
+    } else if (isa<CleanupPadInst>(Pad)) {
+      // Stop on cleanup pads. Cleanups are always funclet entries for all known
+      // personalities.
+      UnwindDests.push_back(FuncInfo.MBBMap[EHPadBB]);
+      UnwindDests.back()->setIsEHFuncletEntry();
+      break;
+    } else if (const auto *CPI = dyn_cast<CatchPadInst>(Pad)) {
+      // Add the catchpad handler to the possible destinations.
+      UnwindDests.push_back(FuncInfo.MBBMap[CPI->getNormalDest()]);
+      // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
+      if (IsMSVCCXX || IsCoreCLR)
+        UnwindDests.back()->setIsEHFuncletEntry();
+      EHPadBB = CPI->getUnwindDest();
+    } else if (const auto *CEPI = dyn_cast<CatchEndPadInst>(Pad)) {
+      EHPadBB = CEPI->getUnwindDest();
+    } else if (const auto *CEPI = dyn_cast<CleanupEndPadInst>(Pad)) {
+      EHPadBB = CEPI->getUnwindDest();
+    }
+  }
+}
+
+void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
+  // Update successor info.
+  // FIXME: The weights for catchpads will be wrong.
+  SmallVector<MachineBasicBlock *, 1> UnwindDests;
+  findUnwindDestinations(FuncInfo, I.getUnwindDest(), UnwindDests);
+  for (MachineBasicBlock *UnwindDest : UnwindDests) {
+    UnwindDest->setIsEHPad();
+    addSuccessorWithWeight(FuncInfo.MBB, UnwindDest);
+  }
+
+  // Create the terminator node.
+  SDValue Ret =
+      DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
+  DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCleanupEndPad(const CleanupEndPadInst &I) {
+  report_fatal_error("visitCleanupEndPad not yet implemented!");
+}
+
+void SelectionDAGBuilder::visitTerminatePad(const TerminatePadInst &TPI) {
+  report_fatal_error("visitTerminatePad not yet implemented!");
+}
+
  void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    auto &DL = DAG.getDataLayout();
@@ -1385,13 +1479,11 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
        ISD::CondCode Condition;
        if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
          Condition = getICmpCondCode(IC->getPredicate());
-      } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+      } else {
+        const FCmpInst *FC = cast<FCmpInst>(Cond);
          Condition = getFCmpCondCode(FC->getPredicate());
          if (TM.Options.NoNaNsFPMath)
            Condition = getFCmpCodeWithoutNaN(Condition);
-      } else {
-        (void)Condition; // silence warning.
-        llvm_unreachable("Unknown compare instruction");
        }
  
        CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
@@ -1421,7 +1513,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
                                                 MachineBasicBlock *FBB,
                                                 MachineBasicBlock *CurBB,
                                                 MachineBasicBlock *SwitchBB,
-                                               unsigned Opc, uint32_t TWeight,
+                                               Instruction::BinaryOps Opc,
+                                               uint32_t TWeight,
                                                 uint32_t FWeight) {
    // If this node is not part of the or/and tree, emit it as a branch.
    const Instruction *BOp = dyn_cast<Instruction>(Cond);
@@ -1585,11 +1678,12 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
    //     jle foo
    //
    if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
-    if (!DAG.getTargetLoweringInfo().isJumpExpensive() &&
-        BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And ||
-                             BOp->getOpcode() == Instruction::Or)) {
+    Instruction::BinaryOps Opcode = BOp->getOpcode();
+    if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
+        !I.getMetadata(LLVMContext::MD_unpredictable) &&
+        (Opcode == Instruction::And || Opcode == Instruction::Or)) {
        FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
-                           BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB),
+                           Opcode, getEdgeWeight(BrMBB, Succ0MBB),
                             getEdgeWeight(BrMBB, Succ1MBB));
        // If the compares in later blocks need to use values not currently
        // exported from this block, export them now.  This block should always
@@ -1797,10 +1891,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
                          GuardPtr, MachinePointerInfo(IRGuard, 0),
                          true, false, false, Align);
  
-  SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(),
-                                  StackSlotPtr,
-                                  MachinePointerInfo::getFixedStack(FI),
-                                  true, false, false, Align);
+  SDValue StackSlot = DAG.getLoad(
+      PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true,
+      false, false, Align);
  
    // Perform the comparison via a subtract/getsetcc.
    EVT VT = Guard.getValueType();
@@ -1884,8 +1978,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
  
    MachineBasicBlock* MBB = B.Cases[0].ThisBB;
  
-  addSuccessorWithWeight(SwitchBB, B.Default);
-  addSuccessorWithWeight(SwitchBB, MBB);
+  addSuccessorWithWeight(SwitchBB, B.Default, B.DefaultWeight);
+  addSuccessorWithWeight(SwitchBB, MBB, B.Weight);
  
    SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
                                  MVT::Other, CopyTo, RangeCmp,
@@ -1958,9 +2052,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
  void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
    MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
  
-  // Retrieve successors.
+  // Retrieve successors. Look through artificial IR level blocks like catchpads
+  // and catchendpads for successors.
    MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
-  MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+  const BasicBlock *EHPadBB = I.getSuccessor(1);
  
    const Value *Callee(I.getCalledValue());
    const Function *Fn = dyn_cast<Function>(Callee);
@@ -1975,14 +2070,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
        break;
      case Intrinsic::experimental_patchpoint_void:
      case Intrinsic::experimental_patchpoint_i64:
-      visitPatchpoint(&I, LandingPad);
+      visitPatchpoint(&I, EHPadBB);
        break;
      case Intrinsic::experimental_gc_statepoint:
-      LowerStatepoint(ImmutableStatepoint(&I), LandingPad);
+      LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
        break;
      }
    } else
-    LowerCallTo(&I, getValue(Callee), false, LandingPad);
+    LowerCallTo(&I, getValue(Callee), false, EHPadBB);
  
    // If the value of the invoke is used outside of its defining block, make it
    // available as a virtual register.
@@ -1992,9 +2087,16 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
      CopyToExportRegsIfNeeded(&I);
    }
  
-  // Update successor info
+  SmallVector<MachineBasicBlock *, 1> UnwindDests;
+  findUnwindDestinations(FuncInfo, EHPadBB, UnwindDests);
+
+  // Update successor info.
+  // FIXME: The weights for catchpads will be wrong.
    addSuccessorWithWeight(InvokeMBB, Return);
-  addSuccessorWithWeight(InvokeMBB, LandingPad);
+  for (MachineBasicBlock *UnwindDest : UnwindDests) {
+    UnwindDest->setIsEHPad();
+    addSuccessorWithWeight(InvokeMBB, UnwindDest);
+  }
  
    // Drop into normal successor.
    DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
@@ -2007,7 +2109,7 @@ void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
  }
  
  void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
-  assert(FuncInfo.MBB->isLandingPad() &&
+  assert(FuncInfo.MBB->isEHPad() &&
           "Call to landingpad not in landing pad!");
  
    MachineBasicBlock *MBB = FuncInfo.MBB;
@@ -2119,7 +2221,8 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
  
  void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
    if (DAG.getTarget().Options.TrapUnreachable)
-    DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+    DAG.setRoot(
+        DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
  }
  
  void SelectionDAGBuilder::visitFSub(const User &I) {
@@ -2260,6 +2363,10 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
    SDValue Op1 = getValue(I.getOperand(0));
    SDValue Op2 = getValue(I.getOperand(1));
    ISD::CondCode Condition = getFCmpCondCode(predicate);
+  
+  // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
+  // FIXME: We should propagate the fast-math-flags to the DAG node itself for
+  // further optimization, but currently FMF is only applicable to binary nodes.
    if (TM.Options.NoNaNsFPMath)
      Condition = getFCmpCodeWithoutNaN(Condition);
    EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
@@ -2284,23 +2391,45 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
  
    // Min/max matching is only viable if all output VTs are the same.
    if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
-    Value *LHS, *RHS;
-    SelectPatternFlavor SPF = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
-    ISD::NodeType Opc = ISD::DELETED_NODE;
-    switch (SPF) {
-    case SPF_UMAX: Opc = ISD::UMAX; break;
-    case SPF_UMIN: Opc = ISD::UMIN; break;
-    case SPF_SMAX: Opc = ISD::SMAX; break;
-    case SPF_SMIN: Opc = ISD::SMIN; break;
-    default: break;
-    }
-
      EVT VT = ValueVTs[0];
      LLVMContext &Ctx = *DAG.getContext();
      auto &TLI = DAG.getTargetLoweringInfo();
      while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector)
        VT = TLI.getTypeToTransformTo(Ctx, VT);
  
+    Value *LHS, *RHS;
+    auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
+    ISD::NodeType Opc = ISD::DELETED_NODE;
+    switch (SPR.Flavor) {
+    case SPF_UMAX:    Opc = ISD::UMAX; break;
+    case SPF_UMIN:    Opc = ISD::UMIN; break;
+    case SPF_SMAX:    Opc = ISD::SMAX; break;
+    case SPF_SMIN:    Opc = ISD::SMIN; break;
+    case SPF_FMINNUM:
+      switch (SPR.NaNBehavior) {
+      case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+      case SPNB_RETURNS_NAN:   Opc = ISD::FMINNAN; break;
+      case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+      case SPNB_RETURNS_ANY:
+        Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ? ISD::FMINNUM
+          : ISD::FMINNAN;
+        break;
+      }
+      break;
+    case SPF_FMAXNUM:
+      switch (SPR.NaNBehavior) {
+      case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+      case SPNB_RETURNS_NAN:   Opc = ISD::FMAXNAN; break;
+      case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+      case SPNB_RETURNS_ANY:
+        Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ? ISD::FMAXNUM
+          : ISD::FMAXNAN;
+        break;
+      }
+      break;
+    default: break;
+    }
+
      if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) &&
          // If the underlying comparison instruction is used by any other instruction,
          // the consumed instructions won't be destroyed, so it is not profitable
@@ -2920,7 +3049,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
    // throughout the function's lifetime.
  
    bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr &&
-    isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout());
+                     isDereferenceablePointer(SV, DAG.getDataLayout());
    unsigned Alignment = I.getAlignment();
  
    AAMDNodes AAInfo;
@@ -2940,8 +3069,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
    if (isVolatile || NumValues > MaxParallelChains)
      // Serialize volatile loads with other side effects.
      Root = getRoot();
-  else if (AA->pointsToConstantMemory(
-               MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) {
+  else if (AA->pointsToConstantMemory(MemoryLocation(
+               SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
      // Do not serialize (non-volatile) loads of constant memory with anything.
      Root = DAG.getEntryNode();
      ConstantMemory = true;
@@ -3056,7 +3185,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
  void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
    SDLoc sdl = getCurSDLoc();
  
-  // llvm.masked.store.*(Src0, Ptr, alignemt, Mask)
+  // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
    Value  *PtrOperand = I.getArgOperand(1);
    SDValue Ptr = getValue(PtrOperand);
    SDValue Src0 = getValue(I.getArgOperand(0));
@@ -3080,56 +3209,63 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
    setValue(&I, StoreNode);
  }
  
-// Gather/scatter receive a vector of pointers.
-// This vector of pointers may be represented as a base pointer + vector of 
-// indices, it depends on GEP and instruction preceeding GEP
-// that calculates indices
+// Get a uniform base for the Gather/Scatter intrinsic.
+// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
+// We try to represent it as a base pointer + vector of indices.
+// Usually, the vector of pointers comes from a 'getelementptr' instruction.
+// The first operand of the GEP may be a single pointer or a vector of pointers
+// Example:
+//   %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
+//  or
+//   %gep.ptr = getelementptr i32, i32* %ptr,        <8 x i32> %ind
+// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
+//
+// When the first GEP operand is a single pointer - it is the uniform base we
+// are looking for. If first operand of the GEP is a splat vector - we
+// extract the spalt value and use it as a uniform base.
+// In all other cases the function returns 'false'.
+//
  static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index,
                             SelectionDAGBuilder* SDB) {
  
-  assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type");
-  GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
-  if (!Gep || Gep->getNumOperands() > 2)
+  SelectionDAG& DAG = SDB->DAG;
+  LLVMContext &Context = *DAG.getContext();
+
+  assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!GEP || GEP->getNumOperands() > 2)
      return false;
-  ShuffleVectorInst *ShuffleInst = 
-    dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand());
-  if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() ||
-      cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() !=
-      Instruction::InsertElement)
+
+  Value *GEPPtr = GEP->getPointerOperand();
+  if (!GEPPtr->getType()->isVectorTy())
+    Ptr = GEPPtr;
+  else if (!(Ptr = getSplatValue(GEPPtr)))
      return false;
  
-  Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1);
+  Value *IndexVal = GEP->getOperand(1);
  
-  SelectionDAG& DAG = SDB->DAG;
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  // Check is the Ptr is inside current basic block
-  // If not, look for the shuffle instruction
-  if (SDB->findValue(Ptr))
-    Base = SDB->getValue(Ptr);
-  else if (SDB->findValue(ShuffleInst)) {
-    SDValue ShuffleNode = SDB->getValue(ShuffleInst);
-    SDLoc sdl = ShuffleNode;
-    Base = DAG.getNode(
-        ISD::EXTRACT_VECTOR_ELT, sdl,
-        ShuffleNode.getValueType().getScalarType(), ShuffleNode,
-        DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout())));
-    SDB->setValue(Ptr, Base);
-  }
-  else
+  // The operands of the GEP may be defined in another basic block.
+  // In this case we'll not find nodes for the operands.
+  if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
      return false;
  
-  Value *IndexVal = Gep->getOperand(1);
-  if (SDB->findValue(IndexVal)) {
-    Index = SDB->getValue(IndexVal);
+  Base = SDB->getValue(Ptr);
+  Index = SDB->getValue(IndexVal);
  
-    if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+  // Suppress sign extension.
+  if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+    if (SDB->findValue(Sext->getOperand(0))) {
        IndexVal = Sext->getOperand(0);
-      if (SDB->findValue(IndexVal))
-        Index = SDB->getValue(IndexVal);
+      Index = SDB->getValue(IndexVal);
      }
-    return true;
    }
-  return false;
+  if (!Index.getValueType().isVector()) {
+    unsigned GEPWidth = GEP->getType()->getVectorNumElements();
+    EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
+    SmallVector<SDValue, 16> Ops(GEPWidth, Index);
+    Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops);
+  }
+  return true;
  }
  
  void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
@@ -3190,7 +3326,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
  
    SDValue InChain = DAG.getRoot();
    if (AA->pointsToConstantMemory(MemoryLocation(
-          PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) {
+          PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+          AAInfo))) {
      // Do not serialize (non-volatile) loads of constant memory with anything.
      InChain = DAG.getEntryNode();
    }
@@ -3233,8 +3370,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
    bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
    bool ConstantMemory = false;
    if (UniformBase &&
-      AA->pointsToConstantMemory(
-          MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) {
+      AA->pointsToConstantMemory(MemoryLocation(
+          BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+          AAInfo))) {
      // Do not serialize (non-volatile) loads of constant memory with anything.
      Root = DAG.getEntryNode();
      ConstantMemory = true;
@@ -3511,6 +3649,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) {
  
  static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
                                         SelectionDAG &DAG) {
+  // TODO: What fast-math-flags should be set on the floating-point nodes?
+
    //   IntegerPartOfX = ((int32_t)(t0);
    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
  
@@ -3609,6 +3749,8 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
      //
      //   #define LOG2OFe 1.4426950f
      //   t0 = Op * LOG2OFe
+
+    // TODO: What fast-math-flags should be set here?
      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
                               getF32Constant(DAG, 0x3fb8aa3b, dl));
      return getLimitedPrecisionExp2(t0, dl, DAG);
@@ -3622,6 +3764,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
  /// limited-precision mode.
  static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
                           const TargetLowering &TLI) {
+ 
+  // TODO: What fast-math-flags should be set on the floating-point nodes?
+
    if (Op.getValueType() == MVT::f32 &&
        LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
      SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3718,6 +3863,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
  /// limited-precision mode.
  static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
                            const TargetLowering &TLI) {
+  
+  // TODO: What fast-math-flags should be set on the floating-point nodes?
+
    if (Op.getValueType() == MVT::f32 &&
        LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
      SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3813,6 +3961,9 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
  /// limited-precision mode.
  static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
                             const TargetLowering &TLI) {
+
+  // TODO: What fast-math-flags should be set on the floating-point nodes?
+
    if (Op.getValueType() == MVT::f32 &&
        LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
      SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3922,6 +4073,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
      }
    }
  
+  // TODO: What fast-math-flags should be set on the FMUL node?
    if (IsExp10) {
      // Put the exponent in the right bit position for later addition to the
      // final result:
@@ -3955,9 +4107,9 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
        return DAG.getConstantFP(1.0, DL, LHS.getValueType());
  
      const Function *F = DAG.getMachineFunction().getFunction();
-    if (!F->hasFnAttribute(Attribute::OptimizeForSize) ||
-        // If optimizing for size, don't insert too many multiplies.  This
-        // inserts up to 5 multiplies.
+    if (!F->optForSize() ||
+        // If optimizing for size, don't insert too many multiplies.
+        // This inserts up to 5 multiplies.
          countPopulation(Val) + Log2_32(Val) < 7) {
        // We use the simple binary decomposition method to generate the multiply
        // sequence.  There are more optimal ways to do this (for example,
@@ -3965,6 +4117,8 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
        // the benefit of being both really simple and much better than a libcall.
        SDValue Res;  // Logically starts equal to 1.0
        SDValue CurSquare = LHS;
+      // TODO: Intrinsics should have fast-math-flags that propagate to these
+      // nodes.
        while (Val) {
          if (Val & 1) {
            if (Res.getNode())
@@ -4238,8 +4392,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
        if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
          Address = BCI->getOperand(0);
        // Parameters are handled specially.
-      bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable ||
-                         isa<Argument>(Address);
+      bool isParameter = Variable->isParameter() || isa<Argument>(Address);
  
        const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
  
@@ -4256,15 +4409,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                     N);
            return nullptr;
          }
-      } else if (AI)
+      } else {
          SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
                                true, 0, dl, SDNodeOrder);
-      else {
-        // Can't do anything with other non-AI cases yet.
-        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
-        DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
-        DEBUG(Address->dump());
-        return nullptr;
        }
        DAG.AddDbgValue(SDV, N.getNode(), isParameter);
      } else {
@@ -4421,6 +4568,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                              getRoot(), getValue(I.getArgOperand(0))));
      return nullptr;
    }
+  case Intrinsic::eh_sjlj_setup_dispatch: {
+    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
+                            getRoot()));
+    return nullptr;
+  }
  
    case Intrinsic::masked_gather:
      visitMaskedGather(I);
@@ -4614,6 +4766,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                 getValue(I.getArgOperand(1)),
                                 getValue(I.getArgOperand(2))));
      } else {
+      // TODO: Intrinsic calls should have fast-math-flags.
        SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
                                  getValue(I.getArgOperand(0)).getValueType(),
                                  getValue(I.getArgOperand(0)),
@@ -4657,6 +4810,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                               getValue(I.getArgOperand(0)).getValueType(),
                               getValue(I.getArgOperand(0))));
      return nullptr;
+  case Intrinsic::uabsdiff:
+    setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1))));
+    return nullptr;
+  case Intrinsic::sabsdiff:
+    setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1))));
+    return nullptr;
    case Intrinsic::cttz: {
      SDValue Arg = getValue(I.getArgOperand(0));
      ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
@@ -4743,8 +4908,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
  
      // Store the stack protector onto the stack.
-    Res = DAG.getStore(Chain, sdl, Src, FIN,
-                       MachinePointerInfo::getFixedStack(FI),
+    Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
+                                                 DAG.getMachineFunction(), FI),
                         true, false, 0);
      setValue(&I, Res);
      DAG.setRoot(Res);
@@ -5040,7 +5205,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      assert(Reg && "cannot get exception code on this platform");
      MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
      const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
-    assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad");
+    assert(FuncInfo.MBB->isEHPad() && "eh.exceptioncode in non-lpad");
      unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC);
      SDValue N =
          DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
@@ -5053,11 +5218,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
  
  std::pair<SDValue, SDValue>
  SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
-                                    MachineBasicBlock *LandingPad) {
+                                    const BasicBlock *EHPadBB) {
    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
    MCSymbol *BeginLabel = nullptr;
  
-  if (LandingPad) {
+  if (EHPadBB) {
      // Insert a label before the invoke call to mark the try range.  This can be
      // used to detect deletion of the invoke via the MachineModuleInfo.
      BeginLabel = MMI.getContext().createTempSymbol();
@@ -5067,7 +5232,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
      unsigned CallSiteIndex = MMI.getCurrentCallSite();
      if (CallSiteIndex) {
        MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
-      LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
+      LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
  
        // Now that the call site is handled, stop tracking it.
        MMI.setCurrentCallSite(0);
@@ -5100,14 +5265,20 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
      DAG.setRoot(Result.second);
    }
  
-  if (LandingPad) {
+  if (EHPadBB) {
      // Insert a label at the end of the invoke call to mark the try range.  This
      // can be used to detect deletion of the invoke via the MachineModuleInfo.
      MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
      DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
  
      // Inform MachineModuleInfo of range.
-    MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
+    if (MMI.hasEHFunclets()) {
+      WinEHFuncInfo &EHInfo =
+          MMI.getWinEHFuncInfo(DAG.getMachineFunction().getFunction());
+      EHInfo.addIPToStateRange(EHPadBB, BeginLabel, EndLabel);
+    } else {
+      MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+    }
    }
  
    return Result;
@@ -5115,7 +5286,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
  
  void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                        bool isTailCall,
-                                      MachineBasicBlock *LandingPad) {
+                                      const BasicBlock *EHPadBB) {
    PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
    FunctionType *FTy = cast<FunctionType>(PT->getElementType());
    Type *RetTy = FTy->getReturnType();
@@ -5154,7 +5325,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
    CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
      .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
      .setTailCall(isTailCall);
-  std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad);
+  std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
  
    if (Result.first.getNode())
      setValue(CS.getInstruction(), Result.first);
@@ -5978,7 +6149,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  
        if (OpInfo.ConstraintVT != Input.ConstraintVT) {
-       const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+        const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
          std::pair<unsigned, const TargetRegisterClass *> MatchRC =
              TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
                                               OpInfo.ConstraintVT);
@@ -6037,10 +6208,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
          int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
          SDValue StackSlot =
              DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
-        Chain = DAG.getStore(Chain, getCurSDLoc(),
-                             OpInfo.CallOperand, StackSlot,
-                             MachinePointerInfo::getFixedStack(SSFI),
-                             false, false, 0);
+        Chain = DAG.getStore(
+            Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot,
+            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+            false, false, 0);
          OpInfo.CallOperand = StackSlot;
        }
  
@@ -6460,12 +6631,9 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
  /// This is a helper for lowering intrinsics that follow a target calling
  /// convention or require stack pointer adjustment. Only a subset of the
  /// intrinsic's operands need to participate in the calling convention.
-std::pair<SDValue, SDValue>
-SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
-                                       unsigned NumArgs, SDValue Callee,
-                                       Type *ReturnTy,
-                                       MachineBasicBlock *LandingPad,
-                                       bool IsPatchPoint) {
+std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(
+    ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee,
+    Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) {
    TargetLowering::ArgListTy Args;
    Args.reserve(NumArgs);
  
@@ -6489,7 +6657,7 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
      .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs)
      .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
  
-  return lowerInvokable(CLI, LandingPad);
+  return lowerInvokable(CLI, EHPadBB);
  }
  
  /// \brief Add a stack map intrinsic call's live variable operands to a stackmap
@@ -6593,7 +6761,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
  
  /// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
  void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
-                                          MachineBasicBlock *LandingPad) {
+                                          const BasicBlock *EHPadBB) {
    // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
    //                                                 i32 <numBytes>,
    //                                                 i8* <target>,
@@ -6630,9 +6798,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
    unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
    Type *ReturnTy =
      IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
-  std::pair<SDValue, SDValue> Result =
-    lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
-                      LandingPad, true);
+  std::pair<SDValue, SDValue> Result = lowerCallOperands(
+      CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true);
  
    SDNode *CallEnd = Result.second.getNode();
    if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
@@ -6986,8 +7153,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
                                                          PtrVT));
        SDValue L = CLI.DAG.getLoad(
            RetTys[i], CLI.DL, CLI.Chain, Add,
-          MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
-          false, false, 1);
+          MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
+                                            DemoteStackIdx, Offsets[i]),
+          false, false, false, 1);
        ReturnValues[i] = L;
        Chains[i] = L.getValue(1);
      }
@@ -7707,12 +7875,22 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
                             .getSizeInBits();
    assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
  
-  if (Low.isNonNegative() && High.slt(BitWidth)) {
-    // Optimize the case where all the case values fit in a
-    // word without having to subtract minValue. In this case,
-    // we can optimize away the subtraction.
+  // Check if the clusters cover a contiguous range such that no value in the
+  // range will jump to the default statement.
+  bool ContiguousRange = true;
+  for (int64_t I = First + 1; I <= Last; ++I) {
+    if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
+      ContiguousRange = false;
+      break;
+    }
+  }
+
+  if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
+    // Optimize the case where all the case values fit in a word without having
+    // to subtract minValue. In this case, we can optimize away the subtraction.
      LowBound = APInt::getNullValue(Low.getBitWidth());
      CmpRange = High;
+    ContiguousRange = false;
    } else {
      LowBound = Low;
      CmpRange = High - Low;
@@ -7755,8 +7933,9 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
      BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight));
    }
    BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
-                            SI->getCondition(), -1U, MVT::Other, false, nullptr,
-                            nullptr, std::move(BTI));
+                            SI->getCondition(), -1U, MVT::Other, false,
+                            ContiguousRange, nullptr, nullptr, std::move(BTI),
+                            TotalWeight);
  
    BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
                                      BitTestCases.size() - 1, TotalWeight);
@@ -7949,7 +8128,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
    }
  
    // Compute total weight.
-  uint32_t UnhandledWeights = 0;
+  uint32_t DefaultWeight = W.DefaultWeight;
+  uint32_t UnhandledWeights = DefaultWeight;
    for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) {
      UnhandledWeights += I->Weight;
      assert(UnhandledWeights >= I->Weight && "Weight overflow!");
@@ -7967,6 +8147,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
        // Put Cond in a virtual register to make it available from the new blocks.
        ExportFromCurrentBlock(Cond);
      }
+    UnhandledWeights -= I->Weight;
  
      switch (I->Kind) {
        case CC_JumpTable: {
@@ -7977,8 +8158,26 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
          // The jump block hasn't been inserted yet; insert it here.
          MachineBasicBlock *JumpMBB = JT->MBB;
          CurMF->insert(BBI, JumpMBB);
-        addSuccessorWithWeight(CurMBB, Fallthrough);
-        addSuccessorWithWeight(CurMBB, JumpMBB);
+
+        uint32_t JumpWeight = I->Weight;
+        uint32_t FallthroughWeight = UnhandledWeights;
+
+        // If the default statement is a target of the jump table, we evenly
+        // distribute the default weight to successors of CurMBB. Also update
+        // the weight on the edge from JumpMBB to Fallthrough.
+        for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+                                              SE = JumpMBB->succ_end();
+             SI != SE; ++SI) {
+          if (*SI == DefaultMBB) {
+            JumpWeight += DefaultWeight / 2;
+            FallthroughWeight -= DefaultWeight / 2;
+            JumpMBB->setSuccWeight(SI, DefaultWeight / 2);
+            break;
+          }
+        }
+
+        addSuccessorWithWeight(CurMBB, Fallthrough, FallthroughWeight);
+        addSuccessorWithWeight(CurMBB, JumpMBB, JumpWeight);
  
          // The jump table header will be inserted in our current block, do the
          // range check, and fall through to our fallthrough block.
@@ -8004,8 +8203,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
          BTB->Parent = CurMBB;
          BTB->Default = Fallthrough;
  
-        // If we're in the right place, emit the bit test header header right now.
-        if (CurMBB ==SwitchMBB) {
+        BTB->DefaultWeight = UnhandledWeights;
+        // If the cases in bit test don't form a contiguous range, we evenly
+        // distribute the weight on the edge to Fallthrough to two successors
+        // of CurMBB.
+        if (!BTB->ContiguousRange) {
+          BTB->Weight += DefaultWeight / 2;
+          BTB->DefaultWeight -= DefaultWeight / 2;
+        }
+
+        // If we're in the right place, emit the bit test header right now.
+        if (CurMBB == SwitchMBB) {
            visitBitTestHeader(*BTB, SwitchMBB);
            BTB->Emitted = true;
          }
@@ -8029,7 +8237,6 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
          }
  
          // The false weight is the sum of all unhandled cases.
-        UnhandledWeights -= I->Weight;
          CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight,
                       UnhandledWeights);
  
@@ -8071,8 +8278,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
    // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
    CaseClusterIt LastLeft = W.FirstCluster;
    CaseClusterIt FirstRight = W.LastCluster;
-  uint32_t LeftWeight = LastLeft->Weight;
-  uint32_t RightWeight = FirstRight->Weight;
+  uint32_t LeftWeight = LastLeft->Weight + W.DefaultWeight / 2;
+  uint32_t RightWeight = FirstRight->Weight + W.DefaultWeight / 2;
  
    // Move LastLeft and FirstRight towards each other from opposite directions to
    // find a partitioning of the clusters which balances the weight on both
@@ -8158,7 +8365,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
    } else {
      LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
      FuncInfo.MF->insert(BBI, LeftMBB);
-    WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot});
+    WorkList.push_back(
+        {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultWeight / 2});
      // Put Cond in a virtual register to make it available from the new blocks.
      ExportFromCurrentBlock(Cond);
    }
@@ -8173,7 +8381,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
    } else {
      RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
      FuncInfo.MF->insert(BBI, RightMBB);
-    WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT});
+    WorkList.push_back(
+        {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultWeight / 2});
      // Put Cond in a virtual register to make it available from the new blocks.
      ExportFromCurrentBlock(Cond);
    }
@@ -8274,7 +8483,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
    SwitchWorkList WorkList;
    CaseClusterIt First = Clusters.begin();
    CaseClusterIt Last = Clusters.end() - 1;
-  WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr});
+  uint32_t DefaultWeight = getEdgeWeight(SwitchMBB, DefaultMBB);
+  WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultWeight});
  
    while (!WorkList.empty()) {
      SwitchWorkListItem W = WorkList.back();