X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FSystemZ%2FSystemZISelLowering.cpp;h=b1036a2e4739faf0432211231e19908473f2d63e;hb=1f7a90d7936a9a6278365ea8b0521c7ff17432c3;hp=9cce34fa386dc7a9f3fa992bdaaf98c3072fac11;hpb=b653ed85457de149bb42c22a8098e6b2f2685dd4;p=oota-llvm.git diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 9cce34fa386..b1036a2e473 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "systemz-lower" - #include "SystemZISelLowering.h" #include "SystemZCallingConv.h" #include "SystemZConstantPoolValue.h" @@ -22,10 +20,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Intrinsics.h" #include using namespace llvm; +#define DEBUG_TYPE "systemz-lower" + namespace { // Represents a sequence for extracting a 0/1 value from an IPM result: // (((X ^ XORValue) + AddValue) >> Bit) @@ -80,9 +81,9 @@ static MachineOperand earlyUseOperand(MachineOperand Op) { return Op; } -SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) - : TargetLowering(tm, new TargetLoweringObjectFileELF()), - Subtarget(*tm.getSubtargetImpl()), TM(tm) { +SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, + const SystemZSubtarget &STI) + : TargetLowering(tm), Subtarget(STI) { MVT PtrVT = getPointerTy(); // Set up the register classes. @@ -96,7 +97,7 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); // Compute derived properties from the register classes - computeRegisterProperties(); + computeRegisterProperties(Subtarget.getRegisterInfo()); // Set up special registers. setExceptionPointerRegister(SystemZ::R6D); @@ -163,8 +164,13 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // available, or if the operand is constant. setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + // Use POPCNT on z196 and above. + if (Subtarget.hasPopulationCount()) + setOperationAction(ISD::CTPOP, VT, Custom); + else + setOperationAction(ISD::CTPOP, VT, Expand); + // No special instructions for these. - setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); @@ -176,8 +182,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::SMUL_LOHI, VT, Custom); setOperationAction(ISD::UMUL_LOHI, VT, Custom); - // We have instructions for signed but not unsigned FP conversion. - setOperationAction(ISD::FP_TO_UINT, VT, Expand); + // Only z196 and above have native support for conversions to unsigned. + if (!Subtarget.hasFPExtension()) + setOperationAction(ISD::FP_TO_UINT, VT, Expand); } } @@ -197,10 +204,12 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); - // We have instructions for signed but not unsigned FP conversion. + // z10 has instructions for signed but not unsigned FP conversion. // Handle unsigned 32-bit types as signed 64-bit types. - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + if (!Subtarget.hasFPExtension()) { + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + } // We have native support for a 64-bit CTLZ, via FLOGR. setOperationAction(ISD::CTLZ, MVT::i32, Promote); @@ -209,19 +218,18 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); - // Give LowerOperation the chance to optimize SIGN_EXTEND sequences. - setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); - // FIXME: Can we support these natively? setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); // We have native instructions for i8, i16 and i32 extensions, but not i1. - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + } // Handle the various types of symbolic address. setOperationAction(ISD::ConstantPool, PtrVT, Custom); @@ -275,7 +283,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // Needed so that we don't try to implement f128 constant loads using // a load-and-extend of a f80 constant (in cases where the constant // would fit in an f80). - setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand); + for (MVT VT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); // Floating-point truncation and stores need to be done separately. setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -293,6 +302,12 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::VACOPY, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); + // Codes for which we want to perform some z-specific combinations. + setTargetDAGCombine(ISD::SIGN_EXTEND); + + // Handle intrinsics. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + // We want to use MVC in preference to even a single load/store pair. MaxStoresPerMemcpy = 0; MaxStoresPerMemcpyOptSize = 0; @@ -336,9 +351,20 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return Imm.isZero() || Imm.isNegZero(); } -bool SystemZTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, - unsigned, - bool *Fast) const { +bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // We can use CGFI or CLGFI. + return isInt<32>(Imm) || isUInt<32>(Imm); +} + +bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { + // We can use ALGFI or SLGFI. + return isUInt<32>(Imm) || isUInt<32>(-Imm); +} + +bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { // Unaligned accesses should never be slower than the expanded version. // We check specifically for aligned accesses in the few cases where // they are required. @@ -421,7 +447,7 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. @@ -444,31 +470,31 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, break; case 'I': // Unsigned 8-bit constant - if (ConstantInt *C = dyn_cast(CallOperandVal)) + if (auto *C = dyn_cast(CallOperandVal)) if (isUInt<8>(C->getZExtValue())) weight = CW_Constant; break; case 'J': // Unsigned 12-bit constant - if (ConstantInt *C = dyn_cast(CallOperandVal)) + if (auto *C = dyn_cast(CallOperandVal)) if (isUInt<12>(C->getZExtValue())) weight = CW_Constant; break; case 'K': // Signed 16-bit constant - if (ConstantInt *C = dyn_cast(CallOperandVal)) + if (auto *C = dyn_cast(CallOperandVal)) if (isInt<16>(C->getSExtValue())) weight = CW_Constant; break; case 'L': // Signed 20-bit displacement (on all targets we support) - if (ConstantInt *C = dyn_cast(CallOperandVal)) + if (auto *C = dyn_cast(CallOperandVal)) if (isInt<20>(C->getSExtValue())) weight = CW_Constant; break; case 'M': // 0x7fffffff - if (ConstantInt *C = dyn_cast(CallOperandVal)) + if (auto *C = dyn_cast(CallOperandVal)) if (C->getZExtValue() == 0x7fffffff) weight = CW_Constant; break; @@ -489,11 +515,13 @@ parseRegisterNumber(const std::string &Constraint, if (Index < 16 && Map[Index]) return std::make_pair(Map[Index], RC); } - return std::make_pair(0u, static_cast(0)); + return std::make_pair(0U, nullptr); } -std::pair SystemZTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { +std::pair +SystemZTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, const std::string &Constraint, + MVT VT) const { if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { @@ -550,7 +578,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { SystemZMC::FP64Regs); } } - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } void SystemZTargetLowering:: @@ -561,37 +589,37 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, if (Constraint.length() == 1) { switch (Constraint[0]) { case 'I': // Unsigned 8-bit constant - if (ConstantSDNode *C = dyn_cast(Op)) + if (auto *C = dyn_cast(Op)) if (isUInt<8>(C->getZExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; case 'J': // Unsigned 12-bit constant - if (ConstantSDNode *C = dyn_cast(Op)) + if (auto *C = dyn_cast(Op)) if (isUInt<12>(C->getZExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; case 'K': // Signed 16-bit constant - if (ConstantSDNode *C = dyn_cast(Op)) + if (auto *C = dyn_cast(Op)) if (isInt<16>(C->getSExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'L': // Signed 20-bit displacement (on all targets we support) - if (ConstantSDNode *C = dyn_cast(Op)) + if (auto *C = dyn_cast(Op)) if (isInt<20>(C->getSExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'M': // 0x7fffffff - if (ConstantSDNode *C = dyn_cast(Op)) + if (auto *C = dyn_cast(Op)) if (C->getZExtValue() == 0x7fffffff) - Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; } @@ -669,13 +697,13 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); SystemZMachineFunctionInfo *FuncInfo = - MF.getInfo(); - const SystemZFrameLowering *TFL = - static_cast(TM.getFrameLowering()); + MF.getInfo(); + auto *TFL = + static_cast(Subtarget.getFrameLowering()); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext()); + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); unsigned NumFixedGPRs = 0; @@ -725,7 +753,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, EVT PtrVT = getPointerTy(); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) - FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4)); + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, + DAG.getIntPtrConstant(4, DL)); ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0); @@ -770,15 +799,15 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, } // Join the stores, which are independent of one another. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - &MemOps[NumFixedFPRs], - SystemZ::NumArgFPRs - NumFixedFPRs); + makeArrayRef(&MemOps[NumFixedFPRs], + SystemZ::NumArgFPRs-NumFixedFPRs)); } } return Chain; } -static bool canUseSiblingCall(CCState ArgCCInfo, +static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl &ArgLocs) { // Punt if there are any indirect or stack arguments, or if the call // needs the call-saved argument register R6. @@ -813,7 +842,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext()); + CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); // We don't support GuaranteedTailCallOpt, only automatically-detected @@ -826,7 +855,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Mark the start of the call. if (!IsTailCall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true), + Chain = DAG.getCALLSEQ_START(Chain, + DAG.getConstant(NumBytes, DL, PtrVT, true), DL); // Copy argument values to their designated locations. @@ -862,7 +892,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) Offset += 4; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, - DAG.getIntPtrConstant(Offset)); + DAG.getIntPtrConstant(Offset, DL)); // Emit the store. MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address, @@ -873,17 +903,16 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Join the stores, which are independent of one another. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); // Accept direct calls by converting symbolic call addresses to the // associated Target* opcodes. Force %r1 to be used for indirect // tail calls. SDValue Glue; - if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + if (auto *G = dyn_cast(Callee)) { Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); - } else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) { + } else if (auto *E = dyn_cast(Callee)) { Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); } else if (IsTailCall) { @@ -910,6 +939,12 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegsToPass[I].first, RegsToPass[I].second.getValueType())); + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + // Glue the call to the argument copies, if any. if (Glue.getNode()) Ops.push_back(Glue); @@ -917,20 +952,20 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (IsTailCall) - return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, &Ops[0], Ops.size()); - Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); + Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); Glue = Chain.getValue(1); // Mark the end of the call, which is glued to the call itself. Chain = DAG.getCALLSEQ_END(Chain, - DAG.getConstant(NumBytes, PtrVT, true), - DAG.getConstant(0, PtrVT, true), + DAG.getConstant(NumBytes, DL, PtrVT, true), + DAG.getConstant(0, DL, PtrVT, true), Glue, DL); Glue = Chain.getValue(1); // Assign locations to each value returned by this call. SmallVector RetLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext()); + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); // Copy all of the result registers out of their specified physreg. @@ -961,7 +996,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, // Assign locations to each returned value. SmallVector RetLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext()); + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); // Quick exit for void returns @@ -994,8 +1029,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, if (Glue.getNode()) RetOps.push_back(Glue); - return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps); } SDValue SystemZTargetLowering:: @@ -1003,6 +1037,53 @@ prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); } +// Return true if Op is an intrinsic node with chain that returns the CC value +// as its only (other) argument. Provide the associated SystemZISD opcode and +// the mask of valid CC values if so. +static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, + unsigned &CCValid) { + unsigned Id = cast(Op.getOperand(1))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_tbegin: + Opcode = SystemZISD::TBEGIN; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tbegin_nofloat: + Opcode = SystemZISD::TBEGIN_NOFLOAT; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tend: + Opcode = SystemZISD::TEND; + CCValid = SystemZ::CCMASK_TEND; + return true; + + default: + return false; + } +} + +// Emit an intrinsic with chain with a glued value instead of its CC result. +static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { + // Copy all operands except the intrinsic ID. + unsigned NumOps = Op.getNumOperands(); + SmallVector Ops; + Ops.reserve(NumOps - 1); + Ops.push_back(Op.getOperand(0)); + for (unsigned I = 2; I < NumOps; ++I) + Ops.push_back(Op.getOperand(I)); + + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); + SDValue OldChain = SDValue(Op.getNode(), 1); + SDValue NewChain = SDValue(Intr.getNode(), 0); + DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); + return Intr; +} + // CC is a comparison that will be implemented using an integer or // floating-point comparison. Return the condition code mask for // a branch on true. In the integer case, CCMASK_CMP_UO is set for @@ -1100,11 +1181,11 @@ static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { // If C can be converted to a comparison against zero, adjust the operands // as necessary. -static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) { +static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { if (C.ICmpType == SystemZICMP::UnsignedOnly) return; - ConstantSDNode *ConstOp1 = dyn_cast(C.Op1.getNode()); + auto *ConstOp1 = dyn_cast(C.Op1.getNode()); if (!ConstOp1) return; @@ -1114,13 +1195,13 @@ static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) { (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { C.CCMask ^= SystemZ::CCMASK_CMP_EQ; - C.Op1 = DAG.getConstant(0, C.Op1.getValueType()); + C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); } } // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, // adjust the operands as necessary. -static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) { +static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { // For us to make any changes, it must a comparison between a single-use // load and a constant. if (!C.Op0.hasOneUse() || @@ -1129,14 +1210,14 @@ static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) { return; // We must have an 8- or 16-bit load. - LoadSDNode *Load = cast(C.Op0); + auto *Load = cast(C.Op0); unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits(); if (NumBits != 8 && NumBits != 16) return; // The load must be an extending one and the constant must be within the // range of the unextended value. - ConstantSDNode *ConstOp1 = cast(C.Op1); + auto *ConstOp1 = cast(C.Op1); uint64_t Value = ConstOp1->getZExtValue(); uint64_t Mask = (1 << NumBits) - 1; if (Load->getExtensionType() == ISD::SEXTLOAD) { @@ -1180,18 +1261,18 @@ static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) { Load->getChain(), Load->getBasePtr(), Load->getPointerInfo(), Load->getMemoryVT(), Load->isVolatile(), Load->isNonTemporal(), - Load->getAlignment()); + Load->isInvariant(), Load->getAlignment()); // Make sure that the second operand is an i32 with the right value. if (C.Op1.getValueType() != MVT::i32 || Value != ConstOp1->getZExtValue()) - C.Op1 = DAG.getConstant(Value, MVT::i32); + C.Op1 = DAG.getConstant(Value, DL, MVT::i32); } // Return true if Op is either an unextended load, or a load suitable // for integer register-memory comparisons of type ICmpType. static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { - LoadSDNode *Load = dyn_cast(Op.getNode()); + auto *Load = dyn_cast(Op.getNode()); if (Load) { // There are no instructions to compare a register with a memory byte. if (Load->getMemoryVT() == MVT::i8) @@ -1225,7 +1306,7 @@ static bool shouldSwapCmpOperands(const Comparison &C) { // Never swap comparisons with zero since there are many ways to optimize // those later. - ConstantSDNode *ConstOp1 = dyn_cast(C.Op1); + auto *ConstOp1 = dyn_cast(C.Op1); if (ConstOp1 && ConstOp1->getZExtValue() == 0) return false; @@ -1281,7 +1362,7 @@ static unsigned reverseCCMask(unsigned CCMask) { // Check whether C tests for equality between X and Y and whether X - Y // or Y - X is also computed. In that case it's better to compare the // result of the subtraction against zero. -static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) { +static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) { if (C.CCMask == SystemZ::CCMASK_CMP_EQ || C.CCMask == SystemZ::CCMASK_CMP_NE) { for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { @@ -1290,7 +1371,7 @@ static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) { ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { C.Op0 = SDValue(N, 0); - C.Op1 = DAG.getConstant(0, N->getValueType(0)); + C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); return; } } @@ -1302,7 +1383,7 @@ static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) { // negation to set CC, so avoiding separate LOAD AND TEST and // LOAD (NEGATIVE/COMPLEMENT) instructions. static void adjustForFNeg(Comparison &C) { - ConstantFPSDNode *C1 = dyn_cast(C.Op1); + auto *C1 = dyn_cast(C.Op1); if (C1 && C1->isZero()) { for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { SDNode *N = *I; @@ -1327,7 +1408,7 @@ static void adjustForLTGFR(Comparison &C) { C.Op0.getValueType() == MVT::i64 && C.Op1.getOpcode() == ISD::Constant && cast(C.Op1)->getZExtValue() == 0) { - ConstantSDNode *C1 = dyn_cast(C.Op0.getOperand(1)); + auto *C1 = dyn_cast(C.Op0.getOperand(1)); if (C1 && C1->getZExtValue() == 32) { SDValue ShlOp0 = C.Op0.getOperand(0); // See whether X has any SIGN_EXTEND_INREG uses. @@ -1346,19 +1427,19 @@ static void adjustForLTGFR(Comparison &C) { // If C compares the truncation of an extending load, try to compare // the untruncated value instead. This exposes more opportunities to // reuse CC. -static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) { +static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) { if (C.Op0.getOpcode() == ISD::TRUNCATE && C.Op0.getOperand(0).getOpcode() == ISD::LOAD && C.Op1.getOpcode() == ISD::Constant && cast(C.Op1)->getZExtValue() == 0) { - LoadSDNode *L = cast(C.Op0.getOperand(0)); + auto *L = cast(C.Op0.getOperand(0)); if (L->getMemoryVT().getStoreSizeInBits() <= C.Op0.getValueType().getSizeInBits()) { unsigned Type = L->getExtensionType(); if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { C.Op0 = C.Op0.getOperand(0); - C.Op1 = DAG.getConstant(0, C.Op0.getValueType()); + C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); } } } @@ -1367,7 +1448,7 @@ static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) { // Return true if shift operation N has an in-range constant shift value. // Store it in ShiftVal if so. static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { - ConstantSDNode *Shift = dyn_cast(N.getOperand(1)); + auto *Shift = dyn_cast(N.getOperand(1)); if (!Shift) return false; @@ -1477,9 +1558,9 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, // See whether C can be implemented as a TEST UNDER MASK instruction. // Update the arguments with the TM version if so. -static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { +static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) { // Check that we have a comparison with a constant. - ConstantSDNode *ConstOp1 = dyn_cast(C.Op1); + auto *ConstOp1 = dyn_cast(C.Op1); if (!ConstOp1) return; uint64_t CmpVal = ConstOp1->getZExtValue(); @@ -1487,7 +1568,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { // Check whether the nonconstant input is an AND with a constant mask. Comparison NewC(C); uint64_t MaskVal; - ConstantSDNode *Mask = 0; + ConstantSDNode *Mask = nullptr; if (C.Op0.getOpcode() == ISD::AND) { NewC.Op0 = C.Op0.getOperand(0); NewC.Op1 = C.Op0.getOperand(1); @@ -1517,6 +1598,8 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { MaskVal = -(CmpVal & -CmpVal); NewC.ICmpType = SystemZICMP::UnsignedOnly; } + if (!MaskVal) + return; // Check whether the combination of mask, comparison value and comparison // type are suitable. @@ -1553,14 +1636,58 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { if (Mask && Mask->getZExtValue() == MaskVal) C.Op1 = SDValue(Mask, 0); else - C.Op1 = DAG.getConstant(MaskVal, C.Op0.getValueType()); + C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); C.CCValid = SystemZ::CCMASK_TM; C.CCMask = NewCCMask; } +// Return a Comparison that tests the condition-code result of intrinsic +// node Call against constant integer CC using comparison code Cond. +// Opcode is the opcode of the SystemZISD operation for the intrinsic +// and CCValid is the set of possible condition-code results. +static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, + SDValue Call, unsigned CCValid, uint64_t CC, + ISD::CondCode Cond) { + Comparison C(Call, SDValue()); + C.Opcode = Opcode; + C.CCValid = CCValid; + if (Cond == ISD::SETEQ) + // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. + C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; + else if (Cond == ISD::SETNE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; + else if (Cond == ISD::SETLT || Cond == ISD::SETULT) + // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, + // always true for CC>3. + C.CCMask = CC < 4 ? -1 << (4 - CC) : -1; + else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0; + else if (Cond == ISD::SETLE || Cond == ISD::SETULE) + // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), + // always true for CC>3. + C.CCMask = CC < 4 ? -1 << (3 - CC) : -1; + else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0; + else + llvm_unreachable("Unexpected integer comparison type"); + C.CCMask &= CCValid; + return C; +} + // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode Cond) { + ISD::CondCode Cond, SDLoc DL) { + if (CmpOp1.getOpcode() == ISD::Constant) { + uint64_t Constant = cast(CmpOp1)->getZExtValue(); + unsigned Opcode, CCValid; + if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && + CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && + isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) + return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); + } Comparison C(CmpOp0, CmpOp1); C.CCMask = CCMaskForCondCode(Cond); if (C.Op0.getValueType().isFloatingPoint()) { @@ -1584,11 +1711,11 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, else C.ICmpType = SystemZICMP::SignedOnly; C.CCMask &= ~SystemZ::CCMASK_CMP_UO; - adjustZeroCmp(DAG, C); - adjustSubwordCmp(DAG, C); - adjustForSubtraction(DAG, C); + adjustZeroCmp(DAG, DL, C); + adjustSubwordCmp(DAG, DL, C); + adjustForSubtraction(DAG, DL, C); adjustForLTGFR(C); - adjustICmpTruncate(DAG, C); + adjustICmpTruncate(DAG, DL, C); } if (shouldSwapCmpOperands(C)) { @@ -1596,20 +1723,31 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, C.CCMask = reverseCCMask(C.CCMask); } - adjustForTestUnderMask(DAG, C); + adjustForTestUnderMask(DAG, DL, C); return C; } // Emit the comparison instruction described by C. static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { + if (!C.Op1.getNode()) { + SDValue Op; + switch (C.Op0.getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); + break; + default: + llvm_unreachable("Invalid comparison operands"); + } + return SDValue(Op.getNode(), Op->getNumValues() - 1); + } if (C.Opcode == SystemZISD::ICMP) return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, - DAG.getConstant(C.ICmpType, MVT::i32)); + DAG.getConstant(C.ICmpType, DL, MVT::i32)); if (C.Opcode == SystemZISD::TM) { bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1, - DAG.getConstant(RegisterOnly, MVT::i32)); + DAG.getConstant(RegisterOnly, DL, MVT::i32)); } return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1); } @@ -1623,7 +1761,8 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); - Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64)); + Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, + DAG.getConstant(32, DL, MVT::i64)); Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); } @@ -1655,18 +1794,18 @@ static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue, if (Conversion.XORValue) Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result, - DAG.getConstant(Conversion.XORValue, MVT::i32)); + DAG.getConstant(Conversion.XORValue, DL, MVT::i32)); if (Conversion.AddValue) Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result, - DAG.getConstant(Conversion.AddValue, MVT::i32)); + DAG.getConstant(Conversion.AddValue, DL, MVT::i32)); // The SHR/AND sequence should get optimized to an RISBG. Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result, - DAG.getConstant(Conversion.Bit, MVT::i32)); + DAG.getConstant(Conversion.Bit, DL, MVT::i32)); if (Conversion.Bit != 31) Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, DL, MVT::i32)); return Result; } @@ -1677,24 +1816,23 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDLoc DL(Op); - Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue Glue = emitCmp(DAG, DL, C); return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); } SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue CmpOp0 = Op.getOperand(2); SDValue CmpOp1 = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc DL(Op); - Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue Glue = emitCmp(DAG, DL, C); return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), - Chain, DAG.getConstant(C.CCValid, MVT::i32), - DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue); + Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32), + DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue); } // Return true if Pos is CmpOp and Neg is the negative of CmpOp, @@ -1715,7 +1853,7 @@ static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op, Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op); if (IsNegative) Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), - DAG.getConstant(0, Op.getValueType()), Op); + DAG.getConstant(0, DL, Op.getValueType()), Op); return Op; } @@ -1728,7 +1866,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDLoc DL(Op); - Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); // Check for absolute and negative-absolute selections, including those // where the comparison value is sign-extended (for LPGFR and LNGFR). @@ -1748,8 +1886,8 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, // Special case for handling -1/0 results. The shifts we use here // should get optimized with the IPM conversion sequence. - ConstantSDNode *TrueC = dyn_cast(TrueOp); - ConstantSDNode *FalseC = dyn_cast(FalseOp); + auto *TrueC = dyn_cast(TrueOp); + auto *FalseC = dyn_cast(FalseOp); if (TrueC && FalseC) { int64_t TrueVal = TrueC->getSExtValue(); int64_t FalseVal = FalseC->getSExtValue(); @@ -1763,21 +1901,17 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, if (!is32Bit(VT)) Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result); // Sign-extend from the low bit. - SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, MVT::i32); + SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32); SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt); return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt); } } - SmallVector Ops; - Ops.push_back(TrueOp); - Ops.push_back(FalseOp); - Ops.push_back(DAG.getConstant(C.CCValid, MVT::i32)); - Ops.push_back(DAG.getConstant(C.CCMask, MVT::i32)); - Ops.push_back(Glue); + SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), + DAG.getConstant(C.CCMask, DL, MVT::i32), Glue}; SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); - return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size()); + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); } SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, @@ -1786,8 +1920,8 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, const GlobalValue *GV = Node->getGlobal(); int64_t Offset = Node->getOffset(); EVT PtrVT = getPointerTy(); - Reloc::Model RM = TM.getRelocationModel(); - CodeModel::Model CM = TM.getCodeModel(); + Reloc::Model RM = DAG.getTarget().getRelocationModel(); + CodeModel::Model CM = DAG.getTarget().getCodeModel(); SDValue Result; if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) { @@ -1814,45 +1948,153 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, // addition for it. if (Offset != 0) Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, - DAG.getConstant(Offset, PtrVT)); + DAG.getConstant(Offset, DL, PtrVT)); return Result; } +SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, + SelectionDAG &DAG, + unsigned Opcode, + SDValue GOTOffset) const { + SDLoc DL(Node); + EVT PtrVT = getPointerTy(); + SDValue Chain = DAG.getEntryNode(); + SDValue Glue; + + // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); + Glue = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); + Glue = Chain.getValue(1); + + // The first call operand is the chain and the second is the TLS symbol. + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, + Node->getValueType(0), + 0, 0)); + + // Add argument registers to the end of the list so that they are + // known live into the call. + Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); + Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Glue the call to the argument copies. + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); + Glue = Chain.getValue(1); + + // Copy the return value from %r2. + return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); +} + SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); EVT PtrVT = getPointerTy(); - TLSModel::Model model = TM.getTLSModel(GV); - - if (model != TLSModel::LocalExec) - llvm_unreachable("only local-exec TLS mode supported"); + TLSModel::Model model = DAG.getTarget().getTLSModel(GV); // The high part of the thread pointer is in access register 0. SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(0, DL, MVT::i32)); TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); // The low part of the thread pointer is in access register 1. SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, DL, MVT::i32)); TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); // Merge them into a single 64-bit address. SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, - DAG.getConstant(32, PtrVT)); + DAG.getConstant(32, DL, PtrVT)); SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); - // Get the offset of GA from the thread pointer. - SystemZConstantPoolValue *CPV = - SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); + // Get the offset of GA from the thread pointer, based on the TLS model. + SDValue Offset; + switch (model) { + case TLSModel::GeneralDynamic: { + // Load the GOT offset of the tls_index (module ID / per-symbol offset). + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + + // Call __tls_get_offset to retrieve the offset. + Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); + break; + } + + case TLSModel::LocalDynamic: { + // Load the GOT offset of the module ID. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + + // Call __tls_get_offset to retrieve the module base offset. + Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); + + // Note: The SystemZLDCleanupPass will remove redundant computations + // of the module base offset. Count total number of local-dynamic + // accesses to trigger execution of that pass. + SystemZMachineFunctionInfo* MFI = + DAG.getMachineFunction().getInfo(); + MFI->incNumLocalDynamicTLSAccesses(); + + // Add the per-symbol offset. + CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); + + SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); + DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + DTPOffset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + + Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); + break; + } + + case TLSModel::InitialExec: { + // Load the offset from the GOT. + Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + SystemZII::MO_INDNTPOFF); + Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getGOT(), + false, false, false, 0); + break; + } - // Force the offset into the constant pool and load it from there. - SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8); - SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), - CPAddr, MachinePointerInfo::getConstantPool(), - false, false, false, 0); + case TLSModel::LocalExec: { + // Force the offset into the constant pool and load it from there. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + break; + } + } // Add the base and offset together. return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); @@ -1914,7 +2156,7 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, } else { In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, - DAG.getConstant(32, MVT::i64)); + DAG.getConstant(32, DL, MVT::i64)); } SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, @@ -1929,7 +2171,7 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, MVT::i32, Out64); SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, - DAG.getConstant(32, MVT::i64)); + DAG.getConstant(32, DL, MVT::i64)); return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); } llvm_unreachable("Unexpected bitcast combination"); @@ -1950,8 +2192,8 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, // The initial values of each field. const unsigned NumFields = 4; SDValue Fields[NumFields] = { - DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT), - DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT), + DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), + DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) }; @@ -1963,13 +2205,13 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, SDValue FieldAddr = Addr; if (Offset != 0) FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, - DAG.getIntPtrConstant(Offset)); + DAG.getIntPtrConstant(Offset, DL)); MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, MachinePointerInfo(SV, Offset), false, false, 0); Offset += 8; } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps, NumFields); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, @@ -1981,8 +2223,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, const Value *SrcSV = cast(Op.getOperand(4))->getValue(); SDLoc DL(Op); - return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32), + return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL), /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, + /*isTailCall*/false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } @@ -2010,7 +2253,7 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); SDValue Ops[2] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, @@ -2037,7 +2280,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, // multiplication: // // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) - SDValue C63 = DAG.getConstant(63, MVT::i64); + SDValue C63 = DAG.getConstant(63, DL, MVT::i64); SDValue LL = Op.getOperand(0); SDValue RL = Op.getOperand(1); SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); @@ -2052,7 +2295,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); } - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, @@ -2071,7 +2314,7 @@ SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, // low half first, so the results are in reverse order. lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, @@ -2098,7 +2341,7 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, SDValue Ops[2]; lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, Op0, Op1, Ops[1], Ops[0]); - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, @@ -2116,7 +2359,7 @@ SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, else lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { @@ -2125,8 +2368,8 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { // Get the known-zero masks for each operand. SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) }; APInt KnownZero[2], KnownOne[2]; - DAG.ComputeMaskedBits(Ops[0], KnownZero[0], KnownOne[0]); - DAG.ComputeMaskedBits(Ops[1], KnownZero[1], KnownOne[1]); + DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]); + DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]); // See if the upper 32 bits of one operand and the lower 32 bits of the // other are known zero. They are the low and high operands respectively. @@ -2175,40 +2418,51 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { MVT::i64, HighOp, Low32); } -SDValue SystemZTargetLowering::lowerSIGN_EXTEND(SDValue Op, - SelectionDAG &DAG) const { - // Convert (sext (ashr (shl X, C1), C2)) to - // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as - // cheap as narrower ones. - SDValue N0 = Op.getOperand(0); +SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, + SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { - ConstantSDNode *SraAmt = dyn_cast(N0.getOperand(1)); - SDValue Inner = N0.getOperand(0); - if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { - ConstantSDNode *ShlAmt = dyn_cast(Inner.getOperand(1)); - if (ShlAmt) { - unsigned Extra = (VT.getSizeInBits() - - N0.getValueType().getSizeInBits()); - unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; - unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; - EVT ShiftVT = N0.getOperand(1).getValueType(); - SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, - Inner.getOperand(0)); - SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, - DAG.getConstant(NewShlAmt, ShiftVT)); - return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, - DAG.getConstant(NewSraAmt, ShiftVT)); - } - } + int64_t OrigBitSize = VT.getSizeInBits(); + SDLoc DL(Op); + + // Get the known-zero mask for the operand. + Op = Op.getOperand(0); + APInt KnownZero, KnownOne; + DAG.computeKnownBits(Op, KnownZero, KnownOne); + unsigned NumSignificantBits = (~KnownZero).getActiveBits(); + if (NumSignificantBits == 0) + return DAG.getConstant(0, DL, VT); + + // Skip known-zero high parts of the operand. + int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits); + BitSize = std::min(BitSize, OrigBitSize); + + // The POPCNT instruction counts the number of bits in each byte. + Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); + Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); + Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); + + // Add up per-byte counts in a binary tree. All bits of Op at + // position larger than BitSize remain zero throughout. + for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { + SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); + if (BitSize != OrigBitSize) + Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, + DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); + Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); } - return SDValue(); + + // Extract overall result from high byte. + if (BitSize > 8) + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getConstant(BitSize - 8, DL, VT)); + + return Op; } // Op is an atomic load. Lower it into a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { - AtomicSDNode *Node = cast(Op.getNode()); + auto *Node = cast(Op.getNode()); return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), Node->getChain(), Node->getBasePtr(), Node->getMemoryVT(), Node->getMemOperand()); @@ -2218,7 +2472,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, // by a serialization. SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const { - AtomicSDNode *Node = cast(Op.getNode()); + auto *Node = cast(Op.getNode()); SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(), Node->getBasePtr(), Node->getMemoryVT(), Node->getMemOperand()); @@ -2231,7 +2485,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, unsigned Opcode) const { - AtomicSDNode *Node = cast(Op.getNode()); + auto *Node = cast(Op.getNode()); // 32-bit operations need no code outside the main loop. EVT NarrowVT = Node->getMemoryVT(); @@ -2249,25 +2503,25 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, // Convert atomic subtracts of constants into additions. if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) - if (ConstantSDNode *Const = dyn_cast(Src2)) { + if (auto *Const = dyn_cast(Src2)) { Opcode = SystemZISD::ATOMIC_LOADW_ADD; - Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType()); + Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); } // Get the address of the containing word. SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, - DAG.getConstant(-4, PtrVT)); + DAG.getConstant(-4, DL, PtrVT)); // Get the number of bits that the word must be rotated left in order // to bring the field to the top bits of a GR32. SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, - DAG.getConstant(3, PtrVT)); + DAG.getConstant(3, DL, PtrVT)); BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); // Get the complementing shift amount, for rotating a field in the top // bits back to its proper position. SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, - DAG.getConstant(0, WideVT), BitShift); + DAG.getConstant(0, DL, WideVT), BitShift); // Extend the source operand to 32 bits and prepare it for the inner loop. // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other @@ -2276,28 +2530,27 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, // bits must be set, while for other opcodes they should be left clear. if (Opcode != SystemZISD::ATOMIC_SWAPW) Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, - DAG.getConstant(32 - BitSize, WideVT)); + DAG.getConstant(32 - BitSize, DL, WideVT)); if (Opcode == SystemZISD::ATOMIC_LOADW_AND || Opcode == SystemZISD::ATOMIC_LOADW_NAND) Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, - DAG.getConstant(uint32_t(-1) >> BitSize, WideVT)); + DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); // Construct the ATOMIC_LOADW_* node. SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, - DAG.getConstant(BitSize, WideVT) }; + DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, - array_lengthof(Ops), NarrowVT, MMO); // Rotate the result of the final CS so that the field is in the lower // bits of a GR32, then truncate it. SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, - DAG.getConstant(BitSize, WideVT)); + DAG.getConstant(BitSize, DL, WideVT)); SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; - return DAG.getMergeValues(RetOps, 2, DL); + return DAG.getMergeValues(RetOps, DL); } // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations @@ -2305,7 +2558,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, // operations into additions. SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const { - AtomicSDNode *Node = cast(Op.getNode()); + auto *Node = cast(Op.getNode()); EVT MemVT = Node->getMemoryVT(); if (MemVT == MVT::i32 || MemVT == MVT::i64) { // A full-width operation. @@ -2314,15 +2567,15 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, SDValue NegSrc2; SDLoc DL(Src2); - if (ConstantSDNode *Op2 = dyn_cast(Src2)) { + if (auto *Op2 = dyn_cast(Src2)) { // Use an addition if the operand is constant and either LAA(G) is // available or the negative value is in the range of A(G)FHI. int64_t Value = (-Op2->getAPIntValue()).getSExtValue(); - if (isInt<32>(Value) || TM.getSubtargetImpl()->hasInterlockedAccess1()) - NegSrc2 = DAG.getConstant(Value, MemVT); - } else if (TM.getSubtargetImpl()->hasInterlockedAccess1()) + if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1()) + NegSrc2 = DAG.getConstant(Value, DL, MemVT); + } else if (Subtarget.hasInterlockedAccess1()) // Use LAA(G) if available. - NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT), + NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2); if (NegSrc2.getNode()) @@ -2342,7 +2595,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, // into a fullword ATOMIC_CMP_SWAPW operation. SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const { - AtomicSDNode *Node = cast(Op.getNode()); + auto *Node = cast(Op.getNode()); // We have native support for 32-bit compare and swap. EVT NarrowVT = Node->getMemoryVT(); @@ -2361,26 +2614,25 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, // Get the address of the containing word. SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, - DAG.getConstant(-4, PtrVT)); + DAG.getConstant(-4, DL, PtrVT)); // Get the number of bits that the word must be rotated left in order // to bring the field to the top bits of a GR32. SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, - DAG.getConstant(3, PtrVT)); + DAG.getConstant(3, DL, PtrVT)); BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); // Get the complementing shift amount, for rotating a field in the top // bits back to its proper position. SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, - DAG.getConstant(0, WideVT), BitShift); + DAG.getConstant(0, DL, WideVT), BitShift); // Construct the ATOMIC_CMP_SWAPW node. SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, - NegBitShift, DAG.getConstant(BitSize, WideVT) }; + NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, - VTList, Ops, array_lengthof(Ops), - NarrowVT, MMO); + VTList, Ops, NarrowVT, MMO); return AtomicOp; } @@ -2407,19 +2659,45 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, // Just preserve the chain. return Op.getOperand(0); + SDLoc DL(Op); bool IsWrite = cast(Op.getOperand(2))->getZExtValue(); unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; - MemIntrinsicSDNode *Node = cast(Op.getNode()); + auto *Node = cast(Op.getNode()); SDValue Ops[] = { Op.getOperand(0), - DAG.getConstant(Code, MVT::i32), + DAG.getConstant(Code, DL, MVT::i32), Op.getOperand(1) }; - return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op), - Node->getVTList(), Ops, array_lengthof(Ops), + return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, + Node->getVTList(), Ops, Node->getMemoryVT(), Node->getMemOperand()); } +// Return an i32 that contains the value of CC immediately after After, +// whose final operand must be MVT::Glue. +static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { + SDLoc DL(After); + SDValue Glue = SDValue(After, After->getNumValues() - 1); + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); +} + +SDValue +SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opcode, CCValid; + if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, Glued.getNode()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); + return SDValue(); + } + + return SDValue(); +} + SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -2457,8 +2735,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerUDIVREM(Op, DAG); case ISD::OR: return lowerOR(Op, DAG); - case ISD::SIGN_EXTEND: - return lowerSIGN_EXTEND(Op, DAG); + case ISD::CTPOP: + return lowerCTPOP(Op, DAG); case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: @@ -2493,6 +2771,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerSTACKRESTORE(Op, DAG); case ISD::PREFETCH: return lowerPREFETCH(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return lowerINTRINSIC_W_CHAIN(Op, DAG); default: llvm_unreachable("Unexpected node to lower"); } @@ -2533,6 +2813,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SEARCH_STRING); OPCODE(IPM); OPCODE(SERIALIZE); + OPCODE(TBEGIN); + OPCODE(TBEGIN_NOFLOAT); + OPCODE(TEND); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -2547,10 +2830,44 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(ATOMIC_CMP_SWAPW); OPCODE(PREFETCH); } - return NULL; + return nullptr; #undef OPCODE } +SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + unsigned Opcode = N->getOpcode(); + if (Opcode == ISD::SIGN_EXTEND) { + // Convert (sext (ashr (shl X, C1), C2)) to + // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as + // cheap as narrower ones. + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { + auto *SraAmt = dyn_cast(N0.getOperand(1)); + SDValue Inner = N0.getOperand(0); + if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { + if (auto *ShlAmt = dyn_cast(Inner.getOperand(1))) { + unsigned Extra = (VT.getSizeInBits() - + N0.getValueType().getSizeInBits()); + unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; + unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; + EVT ShiftVT = N0.getOperand(1).getValueType(); + SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, + Inner.getOperand(0)); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, + DAG.getConstant(NewShlAmt, SDLoc(Inner), + ShiftVT)); + return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, + DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); + } + } + } + } + return SDValue(); +} + //===----------------------------------------------------------------------===// // Custom insertion //===----------------------------------------------------------------------===// @@ -2603,7 +2920,8 @@ static unsigned forceReg(MachineInstr *MI, MachineOperand &Base, MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr *MI, MachineBasicBlock *MBB) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); unsigned DestReg = MI->getOperand(0).getReg(); unsigned TrueReg = MI->getOperand(1).getReg(); @@ -2651,7 +2969,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, MachineBasicBlock *MBB, unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); unsigned SrcReg = MI->getOperand(0).getReg(); MachineOperand Base = MI->getOperand(1); @@ -2666,7 +2985,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, // Use STOCOpcode if possible. We could use different store patterns in // order to avoid matching the index register, but the performance trade-offs // might be more complicated in that case. - if (STOCOpcode && !IndexReg && TM.getSubtargetImpl()->hasLoadStoreOnCond()) { + if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { if (Invert) CCMask ^= CCValid; BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) @@ -2718,8 +3037,9 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, unsigned BinOpcode, unsigned BitSize, bool Invert) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); bool IsSubWord = (BitSize < 32); @@ -2789,14 +3109,10 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, unsigned Tmp = MRI.createVirtualRegister(RC); BuildMI(MBB, DL, TII->get(BinOpcode), Tmp) .addReg(RotatedOldVal).addOperand(Src2); - if (BitSize < 32) + if (BitSize <= 32) // XILF with the upper BitSize bits set. BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) - .addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize))); - else if (BitSize == 32) - // XILF with every bit set. - BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) - .addReg(Tmp).addImm(~uint32_t(0)); + .addReg(Tmp).addImm(-1U << (32 - BitSize)); else { // Use LCGR and add -1 to the result, which is more compact than // an XILF, XILH pair. @@ -2841,8 +3157,9 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, unsigned CompareOpcode, unsigned KeepOldMask, unsigned BitSize) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); bool IsSubWord = (BitSize < 32); @@ -2952,8 +3269,9 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, MachineBasicBlock * SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, MachineBasicBlock *MBB) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. @@ -3068,8 +3386,9 @@ MachineBasicBlock * SystemZTargetLowering::emitExt128(MachineInstr *MI, MachineBasicBlock *MBB, bool ClearEven, unsigned SubReg) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -3099,8 +3418,9 @@ MachineBasicBlock * SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, MachineBasicBlock *MBB, unsigned Opcode) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -3113,7 +3433,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, // When generating more than one CLC, all but the last will need to // branch to the end when a difference is found. MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? - splitBlockAfter(MI, MBB) : 0); + splitBlockAfter(MI, MBB) : nullptr); // Check for the loop form, in which operand 5 is the trip count. if (MI->getNumExplicitOperands() > 5) { @@ -3268,8 +3588,9 @@ MachineBasicBlock * SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, MachineBasicBlock *MBB, unsigned Opcode) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -3323,6 +3644,50 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, return DoneMBB; } +// Update TBEGIN instruction with final opcode and register clobbers. +MachineBasicBlock * +SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode, + bool NoFloat) const { + MachineFunction &MF = *MBB->getParent(); + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + + // Update opcode. + MI->setDesc(TII->get(Opcode)); + + // We cannot handle a TBEGIN that clobbers the stack or frame pointer. + // Make sure to add the corresponding GRSM bits if they are missing. + uint64_t Control = MI->getOperand(2).getImm(); + static const unsigned GPRControlBit[16] = { + 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, + 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 + }; + Control |= GPRControlBit[15]; + if (TFI->hasFP(MF)) + Control |= GPRControlBit[11]; + MI->getOperand(2).setImm(Control); + + // Add GPR clobbers. + for (int I = 0; I < 16; I++) { + if ((Control & GPRControlBit[I]) == 0) { + unsigned Reg = SystemZMC::GR64Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + + // Add FPR clobbers. + if (!NoFloat && (Control & 4) != 0) { + for (int I = 0; I < 16; I++) { + unsigned Reg = SystemZMC::FP64Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + + return MBB; +} + MachineBasicBlock *SystemZTargetLowering:: EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { switch (MI->getOpcode()) { @@ -3564,6 +3929,12 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { return emitStringWrapper(MI, MBB, SystemZ::MVST); case SystemZ::SRSTLoop: return emitStringWrapper(MI, MBB, SystemZ::SRST); + case SystemZ::TBEGIN: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); + case SystemZ::TBEGIN_nofloat: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); + case SystemZ::TBEGINC: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); default: llvm_unreachable("Unexpected instr type to insert"); }