X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FSystemZ%2FSystemZISelLowering.cpp;h=b1036a2e4739faf0432211231e19908473f2d63e;hb=1f7a90d7936a9a6278365ea8b0521c7ff17432c3;hp=dcb122cc84dedd0817f6f99c69ba53a0b650fa92;hpb=2dd264c8a3337523cccd2e7192721821678367b7;p=oota-llvm.git

diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index dcb122cc84d..b1036a2e473 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Intrinsics.h"
 #include <cctype>
 
 using namespace llvm;
@@ -80,9 +81,9 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
   return Op;
 }
 
-SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
-    : TargetLowering(tm, new TargetLoweringObjectFileELF()),
-      Subtarget(tm.getSubtarget<SystemZSubtarget>()) {
+SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
+                                             const SystemZSubtarget &STI)
+    : TargetLowering(tm), Subtarget(STI) {
   MVT PtrVT = getPointerTy();
 
   // Set up the register classes.
@@ -96,7 +97,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
   addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
 
   // Compute derived properties from the register classes
-  computeRegisterProperties();
+  computeRegisterProperties(Subtarget.getRegisterInfo());
 
   // Set up special registers.
   setExceptionPointerRegister(SystemZ::R6D);
@@ -163,8 +164,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
       // available, or if the operand is constant.
       setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
 
+      // Use POPCNT on z196 and above.
+      if (Subtarget.hasPopulationCount())
+        setOperationAction(ISD::CTPOP, VT, Custom);
+      else
+        setOperationAction(ISD::CTPOP, VT, Expand);
+
       // No special instructions for these.
-      setOperationAction(ISD::CTPOP,           VT, Expand);
       setOperationAction(ISD::CTTZ,            VT, Expand);
       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
@@ -218,10 +224,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
   setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
 
   // We have native instructions for i8, i16 and i32 extensions, but not i1.
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
-  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
-  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i1, Promote);
+  }
 
   // Handle the various types of symbolic address.
   setOperationAction(ISD::ConstantPool,     PtrVT, Custom);
@@ -275,7 +283,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
   // Needed so that we don't try to implement f128 constant loads using
   // a load-and-extend of a f80 constant (in cases where the constant
   // would fit in an f80).
-  setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
+  for (MVT VT : MVT::fp_valuetypes())
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
 
   // Floating-point truncation and stores need to be done separately.
   setTruncStoreAction(MVT::f64,  MVT::f32, Expand);
@@ -296,6 +305,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
   // Codes for which we want to perform some z-specific combinations.
   setTargetDAGCombine(ISD::SIGN_EXTEND);
 
+  // Handle intrinsics.
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+
   // We want to use MVC in preference to even a single load/store pair.
   MaxStoresPerMemcpy = 0;
   MaxStoresPerMemcpyOptSize = 0;
@@ -339,6 +351,16 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   return Imm.isZero() || Imm.isNegZero();
 }
 
+bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  // We can use CGFI or CLGFI.
+  return isInt<32>(Imm) || isUInt<32>(Imm);
+}
+
+bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+  // We can use ALGFI or SLGFI.
+  return isUInt<32>(Imm) || isUInt<32>(-Imm);
+}
+
 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
                                                            unsigned,
                                                            unsigned,
@@ -496,8 +518,10 @@ parseRegisterNumber(const std::string &Constraint,
   return std::make_pair(0U, nullptr);
 }
 
-std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const {
+std::pair<unsigned, const TargetRegisterClass *>
+SystemZTargetLowering::getRegForInlineAsmConstraint(
+    const TargetRegisterInfo *TRI, const std::string &Constraint,
+    MVT VT) const {
   if (Constraint.size() == 1) {
     // GCC Constraint Letters
     switch (Constraint[0]) {
@@ -554,7 +578,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const {
                                  SystemZMC::FP64Regs);
     }
   }
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
 
 void SystemZTargetLowering::
@@ -567,35 +591,35 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
     case 'I': // Unsigned 8-bit constant
       if (auto *C = dyn_cast<ConstantSDNode>(Op))
         if (isUInt<8>(C->getZExtValue()))
-          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
                                               Op.getValueType()));
       return;
 
     case 'J': // Unsigned 12-bit constant
       if (auto *C = dyn_cast<ConstantSDNode>(Op))
         if (isUInt<12>(C->getZExtValue()))
-          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
                                               Op.getValueType()));
       return;
 
     case 'K': // Signed 16-bit constant
       if (auto *C = dyn_cast<ConstantSDNode>(Op))
         if (isInt<16>(C->getSExtValue()))
-          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
                                               Op.getValueType()));
       return;
 
     case 'L': // Signed 20-bit displacement (on all targets we support)
       if (auto *C = dyn_cast<ConstantSDNode>(Op))
         if (isInt<20>(C->getSExtValue()))
-          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
                                               Op.getValueType()));
       return;
 
     case 'M': // 0x7fffffff
       if (auto *C = dyn_cast<ConstantSDNode>(Op))
         if (C->getZExtValue() == 0x7fffffff)
-          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
                                               Op.getValueType()));
       return;
     }
@@ -673,14 +697,13 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   SystemZMachineFunctionInfo *FuncInfo =
-    MF.getInfo<SystemZMachineFunctionInfo>();
-  auto *TFL = static_cast<const SystemZFrameLowering *>(
-      DAG.getTarget().getFrameLowering());
+      MF.getInfo<SystemZMachineFunctionInfo>();
+  auto *TFL =
+      static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs,
-                 *DAG.getContext());
+  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
   CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
 
   unsigned NumFixedGPRs = 0;
@@ -730,7 +753,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
       EVT PtrVT = getPointerTy();
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
-        FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4));
+        FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
+                          DAG.getIntPtrConstant(4, DL));
       ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
                              MachinePointerInfo::getFixedStack(FI),
                              false, false, false, 0);
@@ -783,7 +807,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
   return Chain;
 }
 
-static bool canUseSiblingCall(CCState ArgCCInfo,
+static bool canUseSiblingCall(const CCState &ArgCCInfo,
                               SmallVectorImpl<CCValAssign> &ArgLocs) {
   // Punt if there are any indirect or stack arguments, or if the call
   // needs the call-saved argument register R6.
@@ -818,8 +842,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   // Analyze the operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState ArgCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs,
-                    *DAG.getContext());
+  CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
   ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
 
   // We don't support GuaranteedTailCallOpt, only automatically-detected
@@ -832,7 +855,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   // Mark the start of the call.
   if (!IsTailCall)
-    Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true),
+    Chain = DAG.getCALLSEQ_START(Chain,
+                                 DAG.getConstant(NumBytes, DL, PtrVT, true),
                                  DL);
 
   // Copy argument values to their designated locations.
@@ -868,7 +892,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
         Offset += 4;
       SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
-                                    DAG.getIntPtrConstant(Offset));
+                                    DAG.getIntPtrConstant(Offset, DL));
 
       // Emit the store.
       MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
@@ -916,8 +940,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
                                   RegsToPass[I].second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
-  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
-  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
@@ -934,15 +958,14 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   // Mark the end of the call, which is glued to the call itself.
   Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getConstant(NumBytes, PtrVT, true),
-                             DAG.getConstant(0, PtrVT, true),
+                             DAG.getConstant(NumBytes, DL, PtrVT, true),
+                             DAG.getConstant(0, DL, PtrVT, true),
                              Glue, DL);
   Glue = Chain.getValue(1);
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RetLocs;
-  CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs,
-                    *DAG.getContext());
+  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
   RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
 
   // Copy all of the result registers out of their specified physreg.
@@ -973,8 +996,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
 
   // Assign locations to each returned value.
   SmallVector<CCValAssign, 16> RetLocs;
-  CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs,
-                    *DAG.getContext());
+  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
   RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
 
   // Quick exit for void returns
@@ -1015,6 +1037,53 @@ prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const {
   return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
 }
 
+// Return true if Op is an intrinsic node with chain that returns the CC value
+// as its only (other) argument.  Provide the associated SystemZISD opcode and
+// the mask of valid CC values if so.
+static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
+                                      unsigned &CCValid) {
+  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  switch (Id) {
+  case Intrinsic::s390_tbegin:
+    Opcode = SystemZISD::TBEGIN;
+    CCValid = SystemZ::CCMASK_TBEGIN;
+    return true;
+
+  case Intrinsic::s390_tbegin_nofloat:
+    Opcode = SystemZISD::TBEGIN_NOFLOAT;
+    CCValid = SystemZ::CCMASK_TBEGIN;
+    return true;
+
+  case Intrinsic::s390_tend:
+    Opcode = SystemZISD::TEND;
+    CCValid = SystemZ::CCMASK_TEND;
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+// Emit an intrinsic with chain with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
+                                             unsigned Opcode) {
+  // Copy all operands except the intrinsic ID.
+  unsigned NumOps = Op.getNumOperands();
+  SmallVector<SDValue, 6> Ops;
+  Ops.reserve(NumOps - 1);
+  Ops.push_back(Op.getOperand(0));
+  for (unsigned I = 2; I < NumOps; ++I)
+    Ops.push_back(Op.getOperand(I));
+
+  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+  SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+  SDValue OldChain = SDValue(Op.getNode(), 1);
+  SDValue NewChain = SDValue(Intr.getNode(), 0);
+  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
+  return Intr;
+}
+
 // CC is a comparison that will be implemented using an integer or
 // floating-point comparison.  Return the condition code mask for
 // a branch on true.  In the integer case, CCMASK_CMP_UO is set for
@@ -1112,7 +1181,7 @@ static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
 
 // If C can be converted to a comparison against zero, adjust the operands
 // as necessary.
-static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) {
+static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
   if (C.ICmpType == SystemZICMP::UnsignedOnly)
     return;
 
@@ -1126,13 +1195,13 @@ static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) {
       (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
       (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
     C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
-    C.Op1 = DAG.getConstant(0, C.Op1.getValueType());
+    C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
   }
 }
 
 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
 // adjust the operands as necessary.
-static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) {
+static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
   // For us to make any changes, it must a comparison between a single-use
   // load and a constant.
   if (!C.Op0.hasOneUse() ||
@@ -1192,12 +1261,12 @@ static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) {
                            Load->getChain(), Load->getBasePtr(),
                            Load->getPointerInfo(), Load->getMemoryVT(),
                            Load->isVolatile(), Load->isNonTemporal(),
-                           Load->getAlignment());
+                           Load->isInvariant(), Load->getAlignment());
 
   // Make sure that the second operand is an i32 with the right value.
   if (C.Op1.getValueType() != MVT::i32 ||
       Value != ConstOp1->getZExtValue())
-    C.Op1 = DAG.getConstant(Value, MVT::i32);
+    C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
 }
 
 // Return true if Op is either an unextended load, or a load suitable
@@ -1293,7 +1362,7 @@ static unsigned reverseCCMask(unsigned CCMask) {
 // Check whether C tests for equality between X and Y and whether X - Y
 // or Y - X is also computed.  In that case it's better to compare the
 // result of the subtraction against zero.
-static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) {
+static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
   if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
       C.CCMask == SystemZ::CCMASK_CMP_NE) {
     for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
@@ -1302,7 +1371,7 @@ static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) {
           ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
            (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
         C.Op0 = SDValue(N, 0);
-        C.Op1 = DAG.getConstant(0, N->getValueType(0));
+        C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
         return;
       }
     }
@@ -1358,7 +1427,7 @@ static void adjustForLTGFR(Comparison &C) {
 // If C compares the truncation of an extending load, try to compare
 // the untruncated value instead.  This exposes more opportunities to
 // reuse CC.
-static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) {
+static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
   if (C.Op0.getOpcode() == ISD::TRUNCATE &&
       C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
       C.Op1.getOpcode() == ISD::Constant &&
@@ -1370,7 +1439,7 @@ static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) {
       if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
           (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
         C.Op0 = C.Op0.getOperand(0);
-        C.Op1 = DAG.getConstant(0, C.Op0.getValueType());
+        C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
       }
     }
   }
@@ -1489,7 +1558,7 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
 
 // See whether C can be implemented as a TEST UNDER MASK instruction.
 // Update the arguments with the TM version if so.
-static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
+static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
   // Check that we have a comparison with a constant.
   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
   if (!ConstOp1)
@@ -1529,6 +1598,8 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
     MaskVal = -(CmpVal & -CmpVal);
     NewC.ICmpType = SystemZICMP::UnsignedOnly;
   }
+  if (!MaskVal)
+    return;
 
   // Check whether the combination of mask, comparison value and comparison
   // type are suitable.
@@ -1565,14 +1636,58 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
   if (Mask && Mask->getZExtValue() == MaskVal)
     C.Op1 = SDValue(Mask, 0);
   else
-    C.Op1 = DAG.getConstant(MaskVal, C.Op0.getValueType());
+    C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
   C.CCValid = SystemZ::CCMASK_TM;
   C.CCMask = NewCCMask;
 }
 
+// Return a Comparison that tests the condition-code result of intrinsic
+// node Call against constant integer CC using comparison code Cond.
+// Opcode is the opcode of the SystemZISD operation for the intrinsic
+// and CCValid is the set of possible condition-code results.
+static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
+                                  SDValue Call, unsigned CCValid, uint64_t CC,
+                                  ISD::CondCode Cond) {
+  Comparison C(Call, SDValue());
+  C.Opcode = Opcode;
+  C.CCValid = CCValid;
+  if (Cond == ISD::SETEQ)
+    // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
+    C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
+  else if (Cond == ISD::SETNE)
+    // ...and the inverse of that.
+    C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
+  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
+    // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
+    // always true for CC>3.
+    C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
+  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
+    // ...and the inverse of that.
+    C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
+  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
+    // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
+    // always true for CC>3.
+    C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
+  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
+    // ...and the inverse of that.
+    C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
+  else
+    llvm_unreachable("Unexpected integer comparison type");
+  C.CCMask &= CCValid;
+  return C;
+}
+
 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
-                         ISD::CondCode Cond) {
+                         ISD::CondCode Cond, SDLoc DL) {
+  if (CmpOp1.getOpcode() == ISD::Constant) {
+    uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
+    unsigned Opcode, CCValid;
+    if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+        CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
+        isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
+      return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+  }
   Comparison C(CmpOp0, CmpOp1);
   C.CCMask = CCMaskForCondCode(Cond);
   if (C.Op0.getValueType().isFloatingPoint()) {
@@ -1596,11 +1711,11 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
     else
       C.ICmpType = SystemZICMP::SignedOnly;
     C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
-    adjustZeroCmp(DAG, C);
-    adjustSubwordCmp(DAG, C);
-    adjustForSubtraction(DAG, C);
+    adjustZeroCmp(DAG, DL, C);
+    adjustSubwordCmp(DAG, DL, C);
+    adjustForSubtraction(DAG, DL, C);
     adjustForLTGFR(C);
-    adjustICmpTruncate(DAG, C);
+    adjustICmpTruncate(DAG, DL, C);
   }
 
   if (shouldSwapCmpOperands(C)) {
@@ -1608,20 +1723,31 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
     C.CCMask = reverseCCMask(C.CCMask);
   }
 
-  adjustForTestUnderMask(DAG, C);
+  adjustForTestUnderMask(DAG, DL, C);
   return C;
 }
 
 // Emit the comparison instruction described by C.
 static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+  if (!C.Op1.getNode()) {
+    SDValue Op;
+    switch (C.Op0.getOpcode()) {
+    case ISD::INTRINSIC_W_CHAIN:
+      Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
+      break;
+    default:
+      llvm_unreachable("Invalid comparison operands");
+    }
+    return SDValue(Op.getNode(), Op->getNumValues() - 1);
+  }
   if (C.Opcode == SystemZISD::ICMP)
     return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
-                       DAG.getConstant(C.ICmpType, MVT::i32));
+                       DAG.getConstant(C.ICmpType, DL, MVT::i32));
   if (C.Opcode == SystemZISD::TM) {
     bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
                          bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
     return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1,
-                       DAG.getConstant(RegisterOnly, MVT::i32));
+                       DAG.getConstant(RegisterOnly, DL, MVT::i32));
   }
   return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1);
 }
@@ -1635,7 +1761,8 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
   Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
   Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
   SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
-  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64));
+  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
+                   DAG.getConstant(32, DL, MVT::i64));
   Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
   Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
 }
@@ -1667,18 +1794,18 @@ static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue,
 
   if (Conversion.XORValue)
     Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
-                         DAG.getConstant(Conversion.XORValue, MVT::i32));
+                         DAG.getConstant(Conversion.XORValue, DL, MVT::i32));
 
   if (Conversion.AddValue)
     Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
-                         DAG.getConstant(Conversion.AddValue, MVT::i32));
+                         DAG.getConstant(Conversion.AddValue, DL, MVT::i32));
 
   // The SHR/AND sequence should get optimized to an RISBG.
   Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
-                       DAG.getConstant(Conversion.Bit, MVT::i32));
+                       DAG.getConstant(Conversion.Bit, DL, MVT::i32));
   if (Conversion.Bit != 31)
     Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
-                         DAG.getConstant(1, MVT::i32));
+                         DAG.getConstant(1, DL, MVT::i32));
   return Result;
 }
 
@@ -1689,24 +1816,23 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
   SDLoc DL(Op);
 
-  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
+  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
   SDValue Glue = emitCmp(DAG, DL, C);
   return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
 }
 
 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue Chain    = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue CmpOp0   = Op.getOperand(2);
   SDValue CmpOp1   = Op.getOperand(3);
   SDValue Dest     = Op.getOperand(4);
   SDLoc DL(Op);
 
-  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
+  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
   SDValue Glue = emitCmp(DAG, DL, C);
   return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
-                     Chain, DAG.getConstant(C.CCValid, MVT::i32),
-                     DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue);
+                     Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
+                     DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue);
 }
 
 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
@@ -1727,7 +1853,7 @@ static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op,
   Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
   if (IsNegative)
     Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
-                     DAG.getConstant(0, Op.getValueType()), Op);
+                     DAG.getConstant(0, DL, Op.getValueType()), Op);
   return Op;
 }
 
@@ -1740,7 +1866,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
   SDLoc DL(Op);
 
-  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
+  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
 
   // Check for absolute and negative-absolute selections, including those
   // where the comparison value is sign-extended (for LPGFR and LNGFR).
@@ -1775,18 +1901,14 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
       if (!is32Bit(VT))
         Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result);
       // Sign-extend from the low bit.
-      SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, MVT::i32);
+      SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32);
       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt);
       return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt);
     }
   }
 
-  SmallVector<SDValue, 5> Ops;
-  Ops.push_back(TrueOp);
-  Ops.push_back(FalseOp);
-  Ops.push_back(DAG.getConstant(C.CCValid, MVT::i32));
-  Ops.push_back(DAG.getConstant(C.CCMask, MVT::i32));
-  Ops.push_back(Glue);
+  SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
+                   DAG.getConstant(C.CCMask, DL, MVT::i32), Glue};
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
   return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
@@ -1826,11 +1948,58 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
   // addition for it.
   if (Offset != 0)
     Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
-                         DAG.getConstant(Offset, PtrVT));
+                         DAG.getConstant(Offset, DL, PtrVT));
 
   return Result;
 }
 
+SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
+                                                 SelectionDAG &DAG,
+                                                 unsigned Opcode,
+                                                 SDValue GOTOffset) const {
+  SDLoc DL(Node);
+  EVT PtrVT = getPointerTy();
+  SDValue Chain = DAG.getEntryNode();
+  SDValue Glue;
+
+  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
+  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
+  Glue = Chain.getValue(1);
+  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
+  Glue = Chain.getValue(1);
+
+  // The first call operand is the chain and the second is the TLS symbol.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
+                                           Node->getValueType(0),
+                                           0, 0));
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
+  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
+
+  // Add a register mask operand representing the call-preserved registers.
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const uint32_t *Mask =
+      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
+  // Glue the call to the argument copies.
+  Ops.push_back(Glue);
+
+  // Emit the call.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
+  Glue = Chain.getValue(1);
+
+  // Copy the return value from %r2.
+  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
+}
+
 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
 						     SelectionDAG &DAG) const {
   SDLoc DL(Node);
@@ -1838,33 +2007,94 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
   EVT PtrVT = getPointerTy();
   TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
 
-  if (model != TLSModel::LocalExec)
-    llvm_unreachable("only local-exec TLS mode supported");
-
   // The high part of the thread pointer is in access register 0.
   SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
-                             DAG.getConstant(0, MVT::i32));
+                             DAG.getConstant(0, DL, MVT::i32));
   TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
 
   // The low part of the thread pointer is in access register 1.
   SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
-                             DAG.getConstant(1, MVT::i32));
+                             DAG.getConstant(1, DL, MVT::i32));
   TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
 
   // Merge them into a single 64-bit address.
   SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
-				    DAG.getConstant(32, PtrVT));
+                                    DAG.getConstant(32, DL, PtrVT));
   SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
 
-  // Get the offset of GA from the thread pointer.
-  SystemZConstantPoolValue *CPV =
-    SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
+  // Get the offset of GA from the thread pointer, based on the TLS model.
+  SDValue Offset;
+  switch (model) {
+    case TLSModel::GeneralDynamic: {
+      // Load the GOT offset of the tls_index (module ID / per-symbol offset).
+      SystemZConstantPoolValue *CPV =
+        SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
+
+      Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+                           Offset, MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+
+      // Call __tls_get_offset to retrieve the offset.
+      Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
+      break;
+    }
+
+    case TLSModel::LocalDynamic: {
+      // Load the GOT offset of the module ID.
+      SystemZConstantPoolValue *CPV =
+        SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
+
+      Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+                           Offset, MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+
+      // Call __tls_get_offset to retrieve the module base offset.
+      Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
 
-  // Force the offset into the constant pool and load it from there.
-  SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8);
-  SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
-			       CPAddr, MachinePointerInfo::getConstantPool(),
-			       false, false, false, 0);
+      // Note: The SystemZLDCleanupPass will remove redundant computations
+      // of the module base offset.  Count total number of local-dynamic
+      // accesses to trigger execution of that pass.
+      SystemZMachineFunctionInfo* MFI =
+        DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
+      MFI->incNumLocalDynamicTLSAccesses();
+
+      // Add the per-symbol offset.
+      CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
+
+      SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
+      DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+                              DTPOffset, MachinePointerInfo::getConstantPool(),
+                              false, false, false, 0);
+
+      Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
+      break;
+    }
+
+    case TLSModel::InitialExec: {
+      // Load the offset from the GOT.
+      Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+                                          SystemZII::MO_INDNTPOFF);
+      Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+                           Offset, MachinePointerInfo::getGOT(),
+                           false, false, false, 0);
+      break;
+    }
+
+    case TLSModel::LocalExec: {
+      // Force the offset into the constant pool and load it from there.
+      SystemZConstantPoolValue *CPV =
+        SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
+
+      Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+                           Offset, MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+      break;
+    }
+  }
 
   // Add the base and offset together.
   return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
@@ -1926,7 +2156,7 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
     } else {
       In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
       In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
-                         DAG.getConstant(32, MVT::i64));
+                         DAG.getConstant(32, DL, MVT::i64));
     }
     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
     return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
@@ -1941,7 +2171,7 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
       return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
                                         MVT::i32, Out64);
     SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
-                                DAG.getConstant(32, MVT::i64));
+                                DAG.getConstant(32, DL, MVT::i64));
     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
   }
   llvm_unreachable("Unexpected bitcast combination");
@@ -1962,8 +2192,8 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
   // The initial values of each field.
   const unsigned NumFields = 4;
   SDValue Fields[NumFields] = {
-    DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT),
-    DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT),
+    DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
+    DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
     DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
     DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
   };
@@ -1975,7 +2205,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
     SDValue FieldAddr = Addr;
     if (Offset != 0)
       FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
-                              DAG.getIntPtrConstant(Offset));
+                              DAG.getIntPtrConstant(Offset, DL));
     MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
                              MachinePointerInfo(SV, Offset),
                              false, false, 0);
@@ -1993,8 +2223,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
   SDLoc DL(Op);
 
-  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32),
+  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
                        /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
+                       /*isTailCall*/false,
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
@@ -2049,7 +2280,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
     // multiplication:
     //
     //   (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
-    SDValue C63 = DAG.getConstant(63, MVT::i64);
+    SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
     SDValue LL = Op.getOperand(0);
     SDValue RL = Op.getOperand(1);
     SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
@@ -2187,6 +2418,47 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
                                    MVT::i64, HighOp, Low32);
 }
 
+SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  int64_t OrigBitSize = VT.getSizeInBits();
+  SDLoc DL(Op);
+
+  // Get the known-zero mask for the operand.
+  Op = Op.getOperand(0);
+  APInt KnownZero, KnownOne;
+  DAG.computeKnownBits(Op, KnownZero, KnownOne);
+  unsigned NumSignificantBits = (~KnownZero).getActiveBits();
+  if (NumSignificantBits == 0)
+    return DAG.getConstant(0, DL, VT);
+
+  // Skip known-zero high parts of the operand.
+  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
+  BitSize = std::min(BitSize, OrigBitSize);
+
+  // The POPCNT instruction counts the number of bits in each byte.
+  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
+  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
+  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+
+  // Add up per-byte counts in a binary tree.  All bits of Op at
+  // position larger than BitSize remain zero throughout.
+  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
+    SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
+    if (BitSize != OrigBitSize)
+      Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
+                        DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
+    Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
+  }
+
+  // Extract overall result from high byte.
+  if (BitSize > 8)
+    Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+                     DAG.getConstant(BitSize - 8, DL, VT));
+
+  return Op;
+}
+
 // Op is an atomic load.  Lower it into a normal volatile load.
 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
                                                 SelectionDAG &DAG) const {
@@ -2233,23 +2505,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
   if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
     if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
       Opcode = SystemZISD::ATOMIC_LOADW_ADD;
-      Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType());
+      Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
     }
 
   // Get the address of the containing word.
   SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
-                                    DAG.getConstant(-4, PtrVT));
+                                    DAG.getConstant(-4, DL, PtrVT));
 
   // Get the number of bits that the word must be rotated left in order
   // to bring the field to the top bits of a GR32.
   SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
-                                 DAG.getConstant(3, PtrVT));
+                                 DAG.getConstant(3, DL, PtrVT));
   BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
 
   // Get the complementing shift amount, for rotating a field in the top
   // bits back to its proper position.
   SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
-                                    DAG.getConstant(0, WideVT), BitShift);
+                                    DAG.getConstant(0, DL, WideVT), BitShift);
 
   // Extend the source operand to 32 bits and prepare it for the inner loop.
   // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
@@ -2258,23 +2530,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
   // bits must be set, while for other opcodes they should be left clear.
   if (Opcode != SystemZISD::ATOMIC_SWAPW)
     Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
-                       DAG.getConstant(32 - BitSize, WideVT));
+                       DAG.getConstant(32 - BitSize, DL, WideVT));
   if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
       Opcode == SystemZISD::ATOMIC_LOADW_NAND)
     Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
-                       DAG.getConstant(uint32_t(-1) >> BitSize, WideVT));
+                       DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
 
   // Construct the ATOMIC_LOADW_* node.
   SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
   SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
-                    DAG.getConstant(BitSize, WideVT) };
+                    DAG.getConstant(BitSize, DL, WideVT) };
   SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
                                              NarrowVT, MMO);
 
   // Rotate the result of the final CS so that the field is in the lower
   // bits of a GR32, then truncate it.
   SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
-                                    DAG.getConstant(BitSize, WideVT));
+                                    DAG.getConstant(BitSize, DL, WideVT));
   SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
 
   SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
@@ -2300,10 +2572,10 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
       // available or the negative value is in the range of A(G)FHI.
       int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
       if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
-        NegSrc2 = DAG.getConstant(Value, MemVT);
+        NegSrc2 = DAG.getConstant(Value, DL, MemVT);
     } else if (Subtarget.hasInterlockedAccess1())
       // Use LAA(G) if available.
-      NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT),
+      NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
                             Src2);
 
     if (NegSrc2.getNode())
@@ -2342,23 +2614,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
 
   // Get the address of the containing word.
   SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
-                                    DAG.getConstant(-4, PtrVT));
+                                    DAG.getConstant(-4, DL, PtrVT));
 
   // Get the number of bits that the word must be rotated left in order
   // to bring the field to the top bits of a GR32.
   SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
-                                 DAG.getConstant(3, PtrVT));
+                                 DAG.getConstant(3, DL, PtrVT));
   BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
 
   // Get the complementing shift amount, for rotating a field in the top
   // bits back to its proper position.
   SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
-                                    DAG.getConstant(0, WideVT), BitShift);
+                                    DAG.getConstant(0, DL, WideVT), BitShift);
 
   // Construct the ATOMIC_CMP_SWAPW node.
   SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
   SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
-                    NegBitShift, DAG.getConstant(BitSize, WideVT) };
+                    NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
   SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
                                              VTList, Ops, NarrowVT, MMO);
   return AtomicOp;
@@ -2387,19 +2659,45 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
     // Just preserve the chain.
     return Op.getOperand(0);
 
+  SDLoc DL(Op);
   bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
   unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
   auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
   SDValue Ops[] = {
     Op.getOperand(0),
-    DAG.getConstant(Code, MVT::i32),
+    DAG.getConstant(Code, DL, MVT::i32),
     Op.getOperand(1)
   };
-  return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op),
+  return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
                                  Node->getVTList(), Ops,
                                  Node->getMemoryVT(), Node->getMemOperand());
 }
 
+// Return an i32 that contains the value of CC immediately after After,
+// whose final operand must be MVT::Glue.
+static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
+  SDLoc DL(After);
+  SDValue Glue = SDValue(After, After->getNumValues() - 1);
+  SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+  return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+                     DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+}
+
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  unsigned Opcode, CCValid;
+  if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
+    assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+    SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
+    SDValue CC = getCCResult(DAG, Glued.getNode());
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
+    return SDValue();
+  }
+
+  return SDValue();
+}
+
 SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
                                               SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
@@ -2437,6 +2735,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerUDIVREM(Op, DAG);
   case ISD::OR:
     return lowerOR(Op, DAG);
+  case ISD::CTPOP:
+    return lowerCTPOP(Op, DAG);
   case ISD::ATOMIC_SWAP:
     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
   case ISD::ATOMIC_STORE:
@@ -2471,6 +2771,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerSTACKRESTORE(Op, DAG);
   case ISD::PREFETCH:
     return lowerPREFETCH(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:
+    return lowerINTRINSIC_W_CHAIN(Op, DAG);
   default:
     llvm_unreachable("Unexpected node to lower");
   }
@@ -2511,6 +2813,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(SEARCH_STRING);
     OPCODE(IPM);
     OPCODE(SERIALIZE);
+    OPCODE(TBEGIN);
+    OPCODE(TBEGIN_NOFLOAT);
+    OPCODE(TEND);
     OPCODE(ATOMIC_SWAPW);
     OPCODE(ATOMIC_LOADW_ADD);
     OPCODE(ATOMIC_LOADW_SUB);
@@ -2552,9 +2857,10 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
           SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
                                     Inner.getOperand(0));
           SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
-                                    DAG.getConstant(NewShlAmt, ShiftVT));
+                                    DAG.getConstant(NewShlAmt, SDLoc(Inner),
+                                                    ShiftVT));
           return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
-                             DAG.getConstant(NewSraAmt, ShiftVT));
+                             DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
         }
       }
     }
@@ -2614,8 +2920,8 @@ static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
 MachineBasicBlock *
 SystemZTargetLowering::emitSelect(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const {
-  const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
-      MBB->getParent()->getTarget().getInstrInfo());
+  const SystemZInstrInfo *TII =
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
 
   unsigned DestReg  = MI->getOperand(0).getReg();
   unsigned TrueReg  = MI->getOperand(1).getReg();
@@ -2663,8 +2969,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
                                      MachineBasicBlock *MBB,
                                      unsigned StoreOpcode, unsigned STOCOpcode,
                                      bool Invert) const {
-  const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
-      MBB->getParent()->getTarget().getInstrInfo());
+  const SystemZInstrInfo *TII =
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
 
   unsigned SrcReg     = MI->getOperand(0).getReg();
   MachineOperand Base = MI->getOperand(1);
@@ -2733,7 +3039,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
                                             bool Invert) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
   bool IsSubWord = (BitSize < 32);
 
@@ -2803,14 +3109,10 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
     unsigned Tmp = MRI.createVirtualRegister(RC);
     BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
       .addReg(RotatedOldVal).addOperand(Src2);
-    if (BitSize < 32)
+    if (BitSize <= 32)
       // XILF with the upper BitSize bits set.
       BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
-        .addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize)));
-    else if (BitSize == 32)
-      // XILF with every bit set.
-      BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
-        .addReg(Tmp).addImm(~uint32_t(0));
+        .addReg(Tmp).addImm(-1U << (32 - BitSize));
     else {
       // Use LCGR and add -1 to the result, which is more compact than
       // an XILF, XILH pair.
@@ -2857,7 +3159,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
                                             unsigned BitSize) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
   bool IsSubWord = (BitSize < 32);
 
@@ -2969,7 +3271,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
                                           MachineBasicBlock *MBB) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   // Extract the operands.  Base can be a register or a frame index.
@@ -3086,7 +3388,7 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
                                   bool ClearEven, unsigned SubReg) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
 
@@ -3118,7 +3420,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
                                          unsigned Opcode) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
 
@@ -3288,7 +3590,7 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
                                          unsigned Opcode) const {
   MachineFunction &MF = *MBB->getParent();
   const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   MachineRegisterInfo &MRI = MF.getRegInfo();
   DebugLoc DL = MI->getDebugLoc();
 
@@ -3342,6 +3644,50 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
   return DoneMBB;
 }
 
+// Update TBEGIN instruction with final opcode and register clobbers.
+MachineBasicBlock *
+SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
+                                            MachineBasicBlock *MBB,
+                                            unsigned Opcode,
+                                            bool NoFloat) const {
+  MachineFunction &MF = *MBB->getParent();
+  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+  const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+
+  // Update opcode.
+  MI->setDesc(TII->get(Opcode));
+
+  // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
+  // Make sure to add the corresponding GRSM bits if they are missing.
+  uint64_t Control = MI->getOperand(2).getImm();
+  static const unsigned GPRControlBit[16] = {
+    0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
+    0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
+  };
+  Control |= GPRControlBit[15];
+  if (TFI->hasFP(MF))
+    Control |= GPRControlBit[11];
+  MI->getOperand(2).setImm(Control);
+
+  // Add GPR clobbers.
+  for (int I = 0; I < 16; I++) {
+    if ((Control & GPRControlBit[I]) == 0) {
+      unsigned Reg = SystemZMC::GR64Regs[I];
+      MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+    }
+  }
+
+  // Add FPR clobbers.
+  if (!NoFloat && (Control & 4) != 0) {
+    for (int I = 0; I < 16; I++) {
+      unsigned Reg = SystemZMC::FP64Regs[I];
+      MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+    }
+  }
+
+  return MBB;
+}
+
 MachineBasicBlock *SystemZTargetLowering::
 EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
   switch (MI->getOpcode()) {
@@ -3583,6 +3929,12 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
     return emitStringWrapper(MI, MBB, SystemZ::MVST);
   case SystemZ::SRSTLoop:
     return emitStringWrapper(MI, MBB, SystemZ::SRST);
+  case SystemZ::TBEGIN:
+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
+  case SystemZ::TBEGIN_nofloat:
+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
+  case SystemZ::TBEGINC:
+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
   default:
     llvm_unreachable("Unexpected instr type to insert");
   }