X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FSystemZ%2FSystemZISelLowering.cpp;h=b1036a2e4739faf0432211231e19908473f2d63e;hb=1f7a90d7936a9a6278365ea8b0521c7ff17432c3;hp=dcb122cc84dedd0817f6f99c69ba53a0b650fa92;hpb=2dd264c8a3337523cccd2e7192721821678367b7;p=oota-llvm.git diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index dcb122cc84d..b1036a2e473 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Intrinsics.h" #include using namespace llvm; @@ -80,9 +81,9 @@ static MachineOperand earlyUseOperand(MachineOperand Op) { return Op; } -SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) - : TargetLowering(tm, new TargetLoweringObjectFileELF()), - Subtarget(tm.getSubtarget()) { +SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, + const SystemZSubtarget &STI) + : TargetLowering(tm), Subtarget(STI) { MVT PtrVT = getPointerTy(); // Set up the register classes. @@ -96,7 +97,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); // Compute derived properties from the register classes - computeRegisterProperties(); + computeRegisterProperties(Subtarget.getRegisterInfo()); // Set up special registers. setExceptionPointerRegister(SystemZ::R6D); @@ -163,8 +164,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) // available, or if the operand is constant. setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + // Use POPCNT on z196 and above. + if (Subtarget.hasPopulationCount()) + setOperationAction(ISD::CTPOP, VT, Custom); + else + setOperationAction(ISD::CTPOP, VT, Expand); + // No special instructions for these. - setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); @@ -218,10 +224,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); // We have native instructions for i8, i16 and i32 extensions, but not i1. - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + } // Handle the various types of symbolic address. setOperationAction(ISD::ConstantPool, PtrVT, Custom); @@ -275,7 +283,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) // Needed so that we don't try to implement f128 constant loads using // a load-and-extend of a f80 constant (in cases where the constant // would fit in an f80). - setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand); + for (MVT VT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); // Floating-point truncation and stores need to be done separately. setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -296,6 +305,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) // Codes for which we want to perform some z-specific combinations. setTargetDAGCombine(ISD::SIGN_EXTEND); + // Handle intrinsics. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + // We want to use MVC in preference to even a single load/store pair. MaxStoresPerMemcpy = 0; MaxStoresPerMemcpyOptSize = 0; @@ -339,6 +351,16 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return Imm.isZero() || Imm.isNegZero(); } +bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // We can use CGFI or CLGFI. + return isInt<32>(Imm) || isUInt<32>(Imm); +} + +bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { + // We can use ALGFI or SLGFI. + return isUInt<32>(Imm) || isUInt<32>(-Imm); +} + bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, @@ -496,8 +518,10 @@ parseRegisterNumber(const std::string &Constraint, return std::make_pair(0U, nullptr); } -std::pair SystemZTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { +std::pair +SystemZTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, const std::string &Constraint, + MVT VT) const { if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { @@ -554,7 +578,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { SystemZMC::FP64Regs); } } - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } void SystemZTargetLowering:: @@ -567,35 +591,35 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, case 'I': // Unsigned 8-bit constant if (auto *C = dyn_cast(Op)) if (isUInt<8>(C->getZExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; case 'J': // Unsigned 12-bit constant if (auto *C = dyn_cast(Op)) if (isUInt<12>(C->getZExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; case 'K': // Signed 16-bit constant if (auto *C = dyn_cast(Op)) if (isInt<16>(C->getSExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'L': // Signed 20-bit displacement (on all targets we support) if (auto *C = dyn_cast(Op)) if (isInt<20>(C->getSExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'M': // 0x7fffffff if (auto *C = dyn_cast(Op)) if (C->getZExtValue() == 0x7fffffff) - Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; } @@ -673,14 +697,13 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); SystemZMachineFunctionInfo *FuncInfo = - MF.getInfo(); - auto *TFL = static_cast( - DAG.getTarget().getFrameLowering()); + MF.getInfo(); + auto *TFL = + static_cast(Subtarget.getFrameLowering()); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs, - *DAG.getContext()); + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); unsigned NumFixedGPRs = 0; @@ -730,7 +753,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, EVT PtrVT = getPointerTy(); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) - FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4)); + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, + DAG.getIntPtrConstant(4, DL)); ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0); @@ -783,7 +807,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, return Chain; } -static bool canUseSiblingCall(CCState ArgCCInfo, +static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl &ArgLocs) { // Punt if there are any indirect or stack arguments, or if the call // needs the call-saved argument register R6. @@ -818,8 +842,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState ArgCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs, - *DAG.getContext()); + CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); // We don't support GuaranteedTailCallOpt, only automatically-detected @@ -832,7 +855,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Mark the start of the call. if (!IsTailCall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true), + Chain = DAG.getCALLSEQ_START(Chain, + DAG.getConstant(NumBytes, DL, PtrVT, true), DL); // Copy argument values to their designated locations. @@ -868,7 +892,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) Offset += 4; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, - DAG.getIntPtrConstant(Offset)); + DAG.getIntPtrConstant(Offset, DL)); // Emit the store. MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address, @@ -916,8 +940,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, RegsToPass[I].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -934,15 +958,14 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Mark the end of the call, which is glued to the call itself. Chain = DAG.getCALLSEQ_END(Chain, - DAG.getConstant(NumBytes, PtrVT, true), - DAG.getConstant(0, PtrVT, true), + DAG.getConstant(NumBytes, DL, PtrVT, true), + DAG.getConstant(0, DL, PtrVT, true), Glue, DL); Glue = Chain.getValue(1); // Assign locations to each value returned by this call. SmallVector RetLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs, - *DAG.getContext()); + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); // Copy all of the result registers out of their specified physreg. @@ -973,8 +996,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, // Assign locations to each returned value. SmallVector RetLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs, - *DAG.getContext()); + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); // Quick exit for void returns @@ -1015,6 +1037,53 @@ prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); } +// Return true if Op is an intrinsic node with chain that returns the CC value +// as its only (other) argument. Provide the associated SystemZISD opcode and +// the mask of valid CC values if so. +static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, + unsigned &CCValid) { + unsigned Id = cast(Op.getOperand(1))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_tbegin: + Opcode = SystemZISD::TBEGIN; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tbegin_nofloat: + Opcode = SystemZISD::TBEGIN_NOFLOAT; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tend: + Opcode = SystemZISD::TEND; + CCValid = SystemZ::CCMASK_TEND; + return true; + + default: + return false; + } +} + +// Emit an intrinsic with chain with a glued value instead of its CC result. +static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { + // Copy all operands except the intrinsic ID. + unsigned NumOps = Op.getNumOperands(); + SmallVector Ops; + Ops.reserve(NumOps - 1); + Ops.push_back(Op.getOperand(0)); + for (unsigned I = 2; I < NumOps; ++I) + Ops.push_back(Op.getOperand(I)); + + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); + SDValue OldChain = SDValue(Op.getNode(), 1); + SDValue NewChain = SDValue(Intr.getNode(), 0); + DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); + return Intr; +} + // CC is a comparison that will be implemented using an integer or // floating-point comparison. Return the condition code mask for // a branch on true. In the integer case, CCMASK_CMP_UO is set for @@ -1112,7 +1181,7 @@ static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { // If C can be converted to a comparison against zero, adjust the operands // as necessary. -static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) { +static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { if (C.ICmpType == SystemZICMP::UnsignedOnly) return; @@ -1126,13 +1195,13 @@ static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) { (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { C.CCMask ^= SystemZ::CCMASK_CMP_EQ; - C.Op1 = DAG.getConstant(0, C.Op1.getValueType()); + C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); } } // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, // adjust the operands as necessary. -static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) { +static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { // For us to make any changes, it must a comparison between a single-use // load and a constant. if (!C.Op0.hasOneUse() || @@ -1192,12 +1261,12 @@ static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) { Load->getChain(), Load->getBasePtr(), Load->getPointerInfo(), Load->getMemoryVT(), Load->isVolatile(), Load->isNonTemporal(), - Load->getAlignment()); + Load->isInvariant(), Load->getAlignment()); // Make sure that the second operand is an i32 with the right value. if (C.Op1.getValueType() != MVT::i32 || Value != ConstOp1->getZExtValue()) - C.Op1 = DAG.getConstant(Value, MVT::i32); + C.Op1 = DAG.getConstant(Value, DL, MVT::i32); } // Return true if Op is either an unextended load, or a load suitable @@ -1293,7 +1362,7 @@ static unsigned reverseCCMask(unsigned CCMask) { // Check whether C tests for equality between X and Y and whether X - Y // or Y - X is also computed. In that case it's better to compare the // result of the subtraction against zero. -static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) { +static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) { if (C.CCMask == SystemZ::CCMASK_CMP_EQ || C.CCMask == SystemZ::CCMASK_CMP_NE) { for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { @@ -1302,7 +1371,7 @@ static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) { ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { C.Op0 = SDValue(N, 0); - C.Op1 = DAG.getConstant(0, N->getValueType(0)); + C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); return; } } @@ -1358,7 +1427,7 @@ static void adjustForLTGFR(Comparison &C) { // If C compares the truncation of an extending load, try to compare // the untruncated value instead. This exposes more opportunities to // reuse CC. -static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) { +static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) { if (C.Op0.getOpcode() == ISD::TRUNCATE && C.Op0.getOperand(0).getOpcode() == ISD::LOAD && C.Op1.getOpcode() == ISD::Constant && @@ -1370,7 +1439,7 @@ static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) { if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { C.Op0 = C.Op0.getOperand(0); - C.Op1 = DAG.getConstant(0, C.Op0.getValueType()); + C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); } } } @@ -1489,7 +1558,7 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, // See whether C can be implemented as a TEST UNDER MASK instruction. // Update the arguments with the TM version if so. -static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { +static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) { // Check that we have a comparison with a constant. auto *ConstOp1 = dyn_cast(C.Op1); if (!ConstOp1) @@ -1529,6 +1598,8 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { MaskVal = -(CmpVal & -CmpVal); NewC.ICmpType = SystemZICMP::UnsignedOnly; } + if (!MaskVal) + return; // Check whether the combination of mask, comparison value and comparison // type are suitable. @@ -1565,14 +1636,58 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { if (Mask && Mask->getZExtValue() == MaskVal) C.Op1 = SDValue(Mask, 0); else - C.Op1 = DAG.getConstant(MaskVal, C.Op0.getValueType()); + C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); C.CCValid = SystemZ::CCMASK_TM; C.CCMask = NewCCMask; } +// Return a Comparison that tests the condition-code result of intrinsic +// node Call against constant integer CC using comparison code Cond. +// Opcode is the opcode of the SystemZISD operation for the intrinsic +// and CCValid is the set of possible condition-code results. +static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, + SDValue Call, unsigned CCValid, uint64_t CC, + ISD::CondCode Cond) { + Comparison C(Call, SDValue()); + C.Opcode = Opcode; + C.CCValid = CCValid; + if (Cond == ISD::SETEQ) + // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. + C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; + else if (Cond == ISD::SETNE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; + else if (Cond == ISD::SETLT || Cond == ISD::SETULT) + // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, + // always true for CC>3. + C.CCMask = CC < 4 ? -1 << (4 - CC) : -1; + else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0; + else if (Cond == ISD::SETLE || Cond == ISD::SETULE) + // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), + // always true for CC>3. + C.CCMask = CC < 4 ? -1 << (3 - CC) : -1; + else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0; + else + llvm_unreachable("Unexpected integer comparison type"); + C.CCMask &= CCValid; + return C; +} + // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode Cond) { + ISD::CondCode Cond, SDLoc DL) { + if (CmpOp1.getOpcode() == ISD::Constant) { + uint64_t Constant = cast(CmpOp1)->getZExtValue(); + unsigned Opcode, CCValid; + if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && + CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && + isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) + return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); + } Comparison C(CmpOp0, CmpOp1); C.CCMask = CCMaskForCondCode(Cond); if (C.Op0.getValueType().isFloatingPoint()) { @@ -1596,11 +1711,11 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, else C.ICmpType = SystemZICMP::SignedOnly; C.CCMask &= ~SystemZ::CCMASK_CMP_UO; - adjustZeroCmp(DAG, C); - adjustSubwordCmp(DAG, C); - adjustForSubtraction(DAG, C); + adjustZeroCmp(DAG, DL, C); + adjustSubwordCmp(DAG, DL, C); + adjustForSubtraction(DAG, DL, C); adjustForLTGFR(C); - adjustICmpTruncate(DAG, C); + adjustICmpTruncate(DAG, DL, C); } if (shouldSwapCmpOperands(C)) { @@ -1608,20 +1723,31 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, C.CCMask = reverseCCMask(C.CCMask); } - adjustForTestUnderMask(DAG, C); + adjustForTestUnderMask(DAG, DL, C); return C; } // Emit the comparison instruction described by C. static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { + if (!C.Op1.getNode()) { + SDValue Op; + switch (C.Op0.getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); + break; + default: + llvm_unreachable("Invalid comparison operands"); + } + return SDValue(Op.getNode(), Op->getNumValues() - 1); + } if (C.Opcode == SystemZISD::ICMP) return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, - DAG.getConstant(C.ICmpType, MVT::i32)); + DAG.getConstant(C.ICmpType, DL, MVT::i32)); if (C.Opcode == SystemZISD::TM) { bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1, - DAG.getConstant(RegisterOnly, MVT::i32)); + DAG.getConstant(RegisterOnly, DL, MVT::i32)); } return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1); } @@ -1635,7 +1761,8 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); - Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64)); + Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, + DAG.getConstant(32, DL, MVT::i64)); Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); } @@ -1667,18 +1794,18 @@ static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue, if (Conversion.XORValue) Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result, - DAG.getConstant(Conversion.XORValue, MVT::i32)); + DAG.getConstant(Conversion.XORValue, DL, MVT::i32)); if (Conversion.AddValue) Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result, - DAG.getConstant(Conversion.AddValue, MVT::i32)); + DAG.getConstant(Conversion.AddValue, DL, MVT::i32)); // The SHR/AND sequence should get optimized to an RISBG. Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result, - DAG.getConstant(Conversion.Bit, MVT::i32)); + DAG.getConstant(Conversion.Bit, DL, MVT::i32)); if (Conversion.Bit != 31) Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, DL, MVT::i32)); return Result; } @@ -1689,24 +1816,23 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDLoc DL(Op); - Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue Glue = emitCmp(DAG, DL, C); return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); } SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue CmpOp0 = Op.getOperand(2); SDValue CmpOp1 = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc DL(Op); - Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue Glue = emitCmp(DAG, DL, C); return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), - Chain, DAG.getConstant(C.CCValid, MVT::i32), - DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue); + Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32), + DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue); } // Return true if Pos is CmpOp and Neg is the negative of CmpOp, @@ -1727,7 +1853,7 @@ static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op, Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op); if (IsNegative) Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), - DAG.getConstant(0, Op.getValueType()), Op); + DAG.getConstant(0, DL, Op.getValueType()), Op); return Op; } @@ -1740,7 +1866,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDLoc DL(Op); - Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); // Check for absolute and negative-absolute selections, including those // where the comparison value is sign-extended (for LPGFR and LNGFR). @@ -1775,18 +1901,14 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, if (!is32Bit(VT)) Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result); // Sign-extend from the low bit. - SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, MVT::i32); + SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32); SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt); return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt); } } - SmallVector Ops; - Ops.push_back(TrueOp); - Ops.push_back(FalseOp); - Ops.push_back(DAG.getConstant(C.CCValid, MVT::i32)); - Ops.push_back(DAG.getConstant(C.CCMask, MVT::i32)); - Ops.push_back(Glue); + SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), + DAG.getConstant(C.CCMask, DL, MVT::i32), Glue}; SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); @@ -1826,11 +1948,58 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, // addition for it. if (Offset != 0) Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, - DAG.getConstant(Offset, PtrVT)); + DAG.getConstant(Offset, DL, PtrVT)); return Result; } +SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, + SelectionDAG &DAG, + unsigned Opcode, + SDValue GOTOffset) const { + SDLoc DL(Node); + EVT PtrVT = getPointerTy(); + SDValue Chain = DAG.getEntryNode(); + SDValue Glue; + + // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); + Glue = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); + Glue = Chain.getValue(1); + + // The first call operand is the chain and the second is the TLS symbol. + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, + Node->getValueType(0), + 0, 0)); + + // Add argument registers to the end of the list so that they are + // known live into the call. + Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); + Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Glue the call to the argument copies. + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); + Glue = Chain.getValue(1); + + // Copy the return value from %r2. + return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); +} + SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { SDLoc DL(Node); @@ -1838,33 +2007,94 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, EVT PtrVT = getPointerTy(); TLSModel::Model model = DAG.getTarget().getTLSModel(GV); - if (model != TLSModel::LocalExec) - llvm_unreachable("only local-exec TLS mode supported"); - // The high part of the thread pointer is in access register 0. SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(0, DL, MVT::i32)); TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); // The low part of the thread pointer is in access register 1. SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, DL, MVT::i32)); TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); // Merge them into a single 64-bit address. SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, - DAG.getConstant(32, PtrVT)); + DAG.getConstant(32, DL, PtrVT)); SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); - // Get the offset of GA from the thread pointer. - SystemZConstantPoolValue *CPV = - SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); + // Get the offset of GA from the thread pointer, based on the TLS model. + SDValue Offset; + switch (model) { + case TLSModel::GeneralDynamic: { + // Load the GOT offset of the tls_index (module ID / per-symbol offset). + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + + // Call __tls_get_offset to retrieve the offset. + Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); + break; + } + + case TLSModel::LocalDynamic: { + // Load the GOT offset of the module ID. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + + // Call __tls_get_offset to retrieve the module base offset. + Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); - // Force the offset into the constant pool and load it from there. - SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8); - SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), - CPAddr, MachinePointerInfo::getConstantPool(), - false, false, false, 0); + // Note: The SystemZLDCleanupPass will remove redundant computations + // of the module base offset. Count total number of local-dynamic + // accesses to trigger execution of that pass. + SystemZMachineFunctionInfo* MFI = + DAG.getMachineFunction().getInfo(); + MFI->incNumLocalDynamicTLSAccesses(); + + // Add the per-symbol offset. + CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); + + SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); + DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + DTPOffset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + + Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); + break; + } + + case TLSModel::InitialExec: { + // Load the offset from the GOT. + Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + SystemZII::MO_INDNTPOFF); + Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getGOT(), + false, false, false, 0); + break; + } + + case TLSModel::LocalExec: { + // Force the offset into the constant pool and load it from there. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + break; + } + } // Add the base and offset together. return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); @@ -1926,7 +2156,7 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, } else { In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, - DAG.getConstant(32, MVT::i64)); + DAG.getConstant(32, DL, MVT::i64)); } SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, @@ -1941,7 +2171,7 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, MVT::i32, Out64); SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, - DAG.getConstant(32, MVT::i64)); + DAG.getConstant(32, DL, MVT::i64)); return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); } llvm_unreachable("Unexpected bitcast combination"); @@ -1962,8 +2192,8 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, // The initial values of each field. const unsigned NumFields = 4; SDValue Fields[NumFields] = { - DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT), - DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT), + DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), + DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) }; @@ -1975,7 +2205,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, SDValue FieldAddr = Addr; if (Offset != 0) FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, - DAG.getIntPtrConstant(Offset)); + DAG.getIntPtrConstant(Offset, DL)); MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, MachinePointerInfo(SV, Offset), false, false, 0); @@ -1993,8 +2223,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, const Value *SrcSV = cast(Op.getOperand(4))->getValue(); SDLoc DL(Op); - return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32), + return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL), /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, + /*isTailCall*/false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } @@ -2049,7 +2280,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, // multiplication: // // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) - SDValue C63 = DAG.getConstant(63, MVT::i64); + SDValue C63 = DAG.getConstant(63, DL, MVT::i64); SDValue LL = Op.getOperand(0); SDValue RL = Op.getOperand(1); SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); @@ -2187,6 +2418,47 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { MVT::i64, HighOp, Low32); } +SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int64_t OrigBitSize = VT.getSizeInBits(); + SDLoc DL(Op); + + // Get the known-zero mask for the operand. + Op = Op.getOperand(0); + APInt KnownZero, KnownOne; + DAG.computeKnownBits(Op, KnownZero, KnownOne); + unsigned NumSignificantBits = (~KnownZero).getActiveBits(); + if (NumSignificantBits == 0) + return DAG.getConstant(0, DL, VT); + + // Skip known-zero high parts of the operand. + int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits); + BitSize = std::min(BitSize, OrigBitSize); + + // The POPCNT instruction counts the number of bits in each byte. + Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); + Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); + Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); + + // Add up per-byte counts in a binary tree. All bits of Op at + // position larger than BitSize remain zero throughout. + for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { + SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); + if (BitSize != OrigBitSize) + Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, + DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); + Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); + } + + // Extract overall result from high byte. + if (BitSize > 8) + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getConstant(BitSize - 8, DL, VT)); + + return Op; +} + // Op is an atomic load. Lower it into a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { @@ -2233,23 +2505,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) if (auto *Const = dyn_cast(Src2)) { Opcode = SystemZISD::ATOMIC_LOADW_ADD; - Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType()); + Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); } // Get the address of the containing word. SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, - DAG.getConstant(-4, PtrVT)); + DAG.getConstant(-4, DL, PtrVT)); // Get the number of bits that the word must be rotated left in order // to bring the field to the top bits of a GR32. SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, - DAG.getConstant(3, PtrVT)); + DAG.getConstant(3, DL, PtrVT)); BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); // Get the complementing shift amount, for rotating a field in the top // bits back to its proper position. SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, - DAG.getConstant(0, WideVT), BitShift); + DAG.getConstant(0, DL, WideVT), BitShift); // Extend the source operand to 32 bits and prepare it for the inner loop. // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other @@ -2258,23 +2530,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, // bits must be set, while for other opcodes they should be left clear. if (Opcode != SystemZISD::ATOMIC_SWAPW) Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, - DAG.getConstant(32 - BitSize, WideVT)); + DAG.getConstant(32 - BitSize, DL, WideVT)); if (Opcode == SystemZISD::ATOMIC_LOADW_AND || Opcode == SystemZISD::ATOMIC_LOADW_NAND) Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, - DAG.getConstant(uint32_t(-1) >> BitSize, WideVT)); + DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); // Construct the ATOMIC_LOADW_* node. SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, - DAG.getConstant(BitSize, WideVT) }; + DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, NarrowVT, MMO); // Rotate the result of the final CS so that the field is in the lower // bits of a GR32, then truncate it. SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, - DAG.getConstant(BitSize, WideVT)); + DAG.getConstant(BitSize, DL, WideVT)); SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; @@ -2300,10 +2572,10 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, // available or the negative value is in the range of A(G)FHI. int64_t Value = (-Op2->getAPIntValue()).getSExtValue(); if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1()) - NegSrc2 = DAG.getConstant(Value, MemVT); + NegSrc2 = DAG.getConstant(Value, DL, MemVT); } else if (Subtarget.hasInterlockedAccess1()) // Use LAA(G) if available. - NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT), + NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2); if (NegSrc2.getNode()) @@ -2342,23 +2614,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, // Get the address of the containing word. SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, - DAG.getConstant(-4, PtrVT)); + DAG.getConstant(-4, DL, PtrVT)); // Get the number of bits that the word must be rotated left in order // to bring the field to the top bits of a GR32. SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, - DAG.getConstant(3, PtrVT)); + DAG.getConstant(3, DL, PtrVT)); BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); // Get the complementing shift amount, for rotating a field in the top // bits back to its proper position. SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, - DAG.getConstant(0, WideVT), BitShift); + DAG.getConstant(0, DL, WideVT), BitShift); // Construct the ATOMIC_CMP_SWAPW node. SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, - NegBitShift, DAG.getConstant(BitSize, WideVT) }; + NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, VTList, Ops, NarrowVT, MMO); return AtomicOp; @@ -2387,19 +2659,45 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, // Just preserve the chain. return Op.getOperand(0); + SDLoc DL(Op); bool IsWrite = cast(Op.getOperand(2))->getZExtValue(); unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; auto *Node = cast(Op.getNode()); SDValue Ops[] = { Op.getOperand(0), - DAG.getConstant(Code, MVT::i32), + DAG.getConstant(Code, DL, MVT::i32), Op.getOperand(1) }; - return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op), + return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, Node->getVTList(), Ops, Node->getMemoryVT(), Node->getMemOperand()); } +// Return an i32 that contains the value of CC immediately after After, +// whose final operand must be MVT::Glue. +static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { + SDLoc DL(After); + SDValue Glue = SDValue(After, After->getNumValues() - 1); + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); +} + +SDValue +SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opcode, CCValid; + if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, Glued.getNode()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); + return SDValue(); + } + + return SDValue(); +} + SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -2437,6 +2735,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerUDIVREM(Op, DAG); case ISD::OR: return lowerOR(Op, DAG); + case ISD::CTPOP: + return lowerCTPOP(Op, DAG); case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: @@ -2471,6 +2771,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerSTACKRESTORE(Op, DAG); case ISD::PREFETCH: return lowerPREFETCH(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return lowerINTRINSIC_W_CHAIN(Op, DAG); default: llvm_unreachable("Unexpected node to lower"); } @@ -2511,6 +2813,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SEARCH_STRING); OPCODE(IPM); OPCODE(SERIALIZE); + OPCODE(TBEGIN); + OPCODE(TBEGIN_NOFLOAT); + OPCODE(TEND); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -2552,9 +2857,10 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, Inner.getOperand(0)); SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, - DAG.getConstant(NewShlAmt, ShiftVT)); + DAG.getConstant(NewShlAmt, SDLoc(Inner), + ShiftVT)); return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, - DAG.getConstant(NewSraAmt, ShiftVT)); + DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); } } } @@ -2614,8 +2920,8 @@ static unsigned forceReg(MachineInstr *MI, MachineOperand &Base, MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr *MI, MachineBasicBlock *MBB) const { - const SystemZInstrInfo *TII = static_cast( - MBB->getParent()->getTarget().getInstrInfo()); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); unsigned DestReg = MI->getOperand(0).getReg(); unsigned TrueReg = MI->getOperand(1).getReg(); @@ -2663,8 +2969,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, MachineBasicBlock *MBB, unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const { - const SystemZInstrInfo *TII = static_cast( - MBB->getParent()->getTarget().getInstrInfo()); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); unsigned SrcReg = MI->getOperand(0).getReg(); MachineOperand Base = MI->getOperand(1); @@ -2733,7 +3039,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, bool Invert) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = - static_cast(MF.getTarget().getInstrInfo()); + static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); bool IsSubWord = (BitSize < 32); @@ -2803,14 +3109,10 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, unsigned Tmp = MRI.createVirtualRegister(RC); BuildMI(MBB, DL, TII->get(BinOpcode), Tmp) .addReg(RotatedOldVal).addOperand(Src2); - if (BitSize < 32) + if (BitSize <= 32) // XILF with the upper BitSize bits set. BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) - .addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize))); - else if (BitSize == 32) - // XILF with every bit set. - BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) - .addReg(Tmp).addImm(~uint32_t(0)); + .addReg(Tmp).addImm(-1U << (32 - BitSize)); else { // Use LCGR and add -1 to the result, which is more compact than // an XILF, XILH pair. @@ -2857,7 +3159,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, unsigned BitSize) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = - static_cast(MF.getTarget().getInstrInfo()); + static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); bool IsSubWord = (BitSize < 32); @@ -2969,7 +3271,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = - static_cast(MF.getTarget().getInstrInfo()); + static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. @@ -3086,7 +3388,7 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI, bool ClearEven, unsigned SubReg) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = - static_cast(MF.getTarget().getInstrInfo()); + static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -3118,7 +3420,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = - static_cast(MF.getTarget().getInstrInfo()); + static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -3288,7 +3590,7 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = - static_cast(MF.getTarget().getInstrInfo()); + static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -3342,6 +3644,50 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, return DoneMBB; } +// Update TBEGIN instruction with final opcode and register clobbers. +MachineBasicBlock * +SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode, + bool NoFloat) const { + MachineFunction &MF = *MBB->getParent(); + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + + // Update opcode. + MI->setDesc(TII->get(Opcode)); + + // We cannot handle a TBEGIN that clobbers the stack or frame pointer. + // Make sure to add the corresponding GRSM bits if they are missing. + uint64_t Control = MI->getOperand(2).getImm(); + static const unsigned GPRControlBit[16] = { + 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, + 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 + }; + Control |= GPRControlBit[15]; + if (TFI->hasFP(MF)) + Control |= GPRControlBit[11]; + MI->getOperand(2).setImm(Control); + + // Add GPR clobbers. + for (int I = 0; I < 16; I++) { + if ((Control & GPRControlBit[I]) == 0) { + unsigned Reg = SystemZMC::GR64Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + + // Add FPR clobbers. + if (!NoFloat && (Control & 4) != 0) { + for (int I = 0; I < 16; I++) { + unsigned Reg = SystemZMC::FP64Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + + return MBB; +} + MachineBasicBlock *SystemZTargetLowering:: EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { switch (MI->getOpcode()) { @@ -3583,6 +3929,12 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { return emitStringWrapper(MI, MBB, SystemZ::MVST); case SystemZ::SRSTLoop: return emitStringWrapper(MI, MBB, SystemZ::SRST); + case SystemZ::TBEGIN: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); + case SystemZ::TBEGIN_nofloat: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); + case SystemZ::TBEGINC: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); default: llvm_unreachable("Unexpected instr type to insert"); }