X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelDAGToDAG.cpp;h=9465420ba90700df25b8bed6eaa66124b6256814;hb=e0364b64d12330f6f8c47ef98fc658468e2b72e4;hp=d836c29d6bd8c3f1ccf44cc27fdad18ab8fc9a01;hpb=6c7ccaa3fd1d6e96d0bf922554b09d2b17c3b0e3;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index d836c29d6bd..9465420ba90 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -19,24 +19,21 @@ #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Type.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CFG.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); @@ -144,10 +141,6 @@ namespace { /// SelectionDAG operations. /// class X86DAGToDAGISel : public SelectionDAGISel { - /// X86Lowering - This object fully describes how to lower LLVM code to an - /// X86-specific SelectionDAG. - const X86TargetLowering &X86Lowering; - /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; @@ -159,7 +152,6 @@ namespace { public: explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), - X86Lowering(*tm.getTargetLowering()), Subtarget(&tm.getSubtarget()), OptForSize(false) {} @@ -191,7 +183,6 @@ namespace { SDNode *Select(SDNode *N); SDNode *SelectGather(SDNode *N, unsigned Opc); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); - SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); @@ -204,9 +195,13 @@ namespace { bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); + bool SelectMOV64Imm32(SDValue N, SDValue &Imm); bool SelectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); + bool SelectLEA64_32Addr(SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, SDValue &Disp, + SDValue &Segment); bool SelectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); @@ -233,14 +228,15 @@ namespace { SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? - CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) : + CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, + getTargetLowering()->getPointerTy()) : AM.Base_Reg; Scale = getI8Imm(AM.Scale); Index = AM.IndexReg; // These are 32-bit even in 64-bit mode since RIP relative offset // is 32-bit. if (AM.GV) - Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(), + Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), MVT::i32, AM.Disp, AM.SymbolFlags); else if (AM.CP) @@ -284,13 +280,13 @@ namespace { /// getTargetMachine - Return a reference to the TargetMachine, casted /// to the target-specific type. - const X86TargetMachine &getTargetMachine() { + const X86TargetMachine &getTargetMachine() const { return static_cast(TM); } /// getInstrInfo - Return a reference to the TargetInstrInfo, casted /// to the target-specific type. - const X86InstrInfo *getInstrInfo() { + const X86InstrInfo *getInstrInfo() const { return getTargetMachine().getInstrInfo(); } }; @@ -363,7 +359,7 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { /// MoveBelowCallOrigChain - Replace the original chain operand of the call with /// load's chain operand and move load below the call's chain operand. static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, - SDValue Call, SDValue OrigChain) { + SDValue Call, SDValue OrigChain) { SmallVector Ops; SDValue Chain = OrigChain.getOperand(0); if (Chain.getNode() == Load.getNode()) @@ -377,7 +373,7 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, else Ops.push_back(Chain.getOperand(i)); SDValue NewChain = - CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(), + CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, &Ops[0], Ops.size()); Ops.clear(); Ops.push_back(NewChain); @@ -387,11 +383,13 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size()); CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); + + unsigned NumOps = Call.getNode()->getNumOperands(); Ops.clear(); Ops.push_back(SDValue(Load.getNode(), 1)); - for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) + for (unsigned i = 1, e = NumOps; i != e; ++i) Ops.push_back(Call.getOperand(i)); - CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps); } /// isCalleeLoad - Return true if call address is a load and it can be @@ -400,6 +398,10 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, /// In the case of a tail call, there isn't a callseq node between the call /// chain and the load. static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { + // The transformation is somewhat dangerous if the call's chain was glued to + // the call. After MoveBelowOrigChain the load is moved between the call and + // the chain, this can create a cycle if the load is not folded. So it is + // *really* important that we are sure the load will be folded. if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) return false; LoadSDNode *LD = dyn_cast(Callee.getNode()); @@ -418,6 +420,11 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { if (!Chain.getNumOperands()) return false; + // Since we are not checking for AA here, conservatively abort if the chain + // writes to memory. It's not safe to move the callee (a load) across a store. + if (isa(Chain.getNode()) && + cast(Chain.getNode())->writeMem()) + return false; if (Chain.getOperand(0).getNode() == Callee.getNode()) return true; if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && @@ -429,15 +436,21 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); + OptForSize = MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. if (OptLevel != CodeGenOpt::None && - (N->getOpcode() == X86ISD::CALL || - N->getOpcode() == X86ISD::TC_RETURN)) { + // Only does this when target favors doesn't favor register indirect + // call. + ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || + (N->getOpcode() == X86ISD::TC_RETURN && + // Only does this if load can be folded into TC_RETURN. + (Subtarget->is64Bit() || + getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { /// Also try moving call address load from outside callseq_start to just /// before the call to allow it to be folded. /// @@ -487,8 +500,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // If the source and destination are SSE registers, then this is a legal // conversion that should not be lowered. - bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); - bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); + const X86TargetLowering *X86Lowering = + static_cast(getTargetLowering()); + bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); + bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); if (SrcIsSSE && DstIsSSE) continue; @@ -511,7 +526,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { MemVT = SrcIsSSE ? SrcVT : DstVT; SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, @@ -769,7 +784,7 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, return true; EVT VT = N.getValueType(); - DebugLoc DL = N.getDebugLoc(); + SDLoc DL(N); SDValue Eight = DAG.getConstant(8, MVT::i8); SDValue NewMask = DAG.getConstant(0xff, VT); SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight); @@ -817,7 +832,7 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, return true; EVT VT = N.getValueType(); - DebugLoc DL = N.getDebugLoc(); + SDLoc DL(N); SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT); SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); @@ -873,8 +888,8 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, return true; unsigned ShiftAmt = Shift.getConstantOperandVal(1); - unsigned MaskLZ = CountLeadingZeros_64(Mask); - unsigned MaskTZ = CountTrailingZeros_64(Mask); + unsigned MaskLZ = countLeadingZeros(Mask); + unsigned MaskTZ = countTrailingZeros(Mask); // The amount of shift we're trying to fit into the addressing mode is taken // from the trailing zeros of the mask. @@ -919,11 +934,11 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, if (ReplacingAnyExtend) { assert(X.getValueType() != VT); // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. - SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, X.getDebugLoc(), VT, X); + SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); InsertDAGNode(DAG, N, NewX); X = NewX; } - DebugLoc DL = N.getDebugLoc(); + SDLoc DL(N); SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8); SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8); @@ -947,7 +962,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { - DebugLoc dl = N.getDebugLoc(); + SDLoc dl(N); DEBUG({ dbgs() << "MatchAddress: "; AM.dump(); @@ -1031,8 +1046,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.IndexReg = ShVal; return false; } - break; } + break; case ISD::SRL: { // Scale must not be used already. @@ -1293,7 +1308,9 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, // that are not a MemSDNode, and thus don't have proper addrspace info. Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores - Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme + Parent->getOpcode() != X86ISD::TLSCALL && // Fixme + Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp + Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp unsigned AddrSpace = cast(Parent)->getPointerInfo().getAddrSpace(); // AddrSpace 256 -> GS, 257 -> FS. @@ -1365,6 +1382,71 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, } +bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) { + if (const ConstantSDNode *CN = dyn_cast(N)) { + uint64_t ImmVal = CN->getZExtValue(); + if ((uint32_t)ImmVal != (uint64_t)ImmVal) + return false; + + Imm = CurDAG->getTargetConstant(ImmVal, MVT::i64); + return true; + } + + // In static codegen with small code model, we can get the address of a label + // into a register with 'movl'. TableGen has already made sure we're looking + // at a label of some kind. + assert(N->getOpcode() == X86ISD::Wrapper && + "Unexpected node type for MOV32ri64"); + N = N.getOperand(0); + + if (N->getOpcode() != ISD::TargetConstantPool && + N->getOpcode() != ISD::TargetJumpTable && + N->getOpcode() != ISD::TargetGlobalAddress && + N->getOpcode() != ISD::TargetExternalSymbol && + N->getOpcode() != ISD::TargetBlockAddress) + return false; + + Imm = N; + return TM.getCodeModel() == CodeModel::Small; +} + +bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, + SDValue &Disp, SDValue &Segment) { + if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment)) + return false; + + SDLoc DL(N); + RegisterSDNode *RN = dyn_cast(Base); + if (RN && RN->getReg() == 0) + Base = CurDAG->getRegister(0, MVT::i64); + else if (Base.getValueType() == MVT::i32 && !dyn_cast(N)) { + // Base could already be %rip, particularly in the x32 ABI. + Base = SDValue(CurDAG->getMachineNode( + TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, + CurDAG->getTargetConstant(0, MVT::i64), + Base, + CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), + 0); + } + + RN = dyn_cast(Index); + if (RN && RN->getReg() == 0) + Index = CurDAG->getRegister(0, MVT::i64); + else { + assert(Index.getValueType() == MVT::i32 && + "Expect to be extending 32-bit registers for use in LEA"); + Index = SDValue(CurDAG->getMachineNode( + TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, + CurDAG->getTargetConstant(0, MVT::i64), + Index, + CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), + 0); + } + + return true; +} + /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, @@ -1472,7 +1554,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, /// SDNode *X86DAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); - return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); + return CurDAG->getRegister(GlobalBaseReg, + getTargetLowering()->getPointerTy()).getNode(); } SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { @@ -1480,172 +1563,26 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue In1 = Node->getOperand(1); SDValue In2L = Node->getOperand(2); SDValue In2H = Node->getOperand(3); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return NULL; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(Node)->getMemOperand(); const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; - SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - MVT::i32, MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), + MVT::i32, MVT::i32, MVT::Other, Ops); cast(ResNode)->setMemRefs(MemOp, MemOp + 1); return ResNode; } -// FIXME: Figure out some way to unify this with the 'or' and other code -// below. -SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { - if (Node->hasAnyUseOfValue(0)) - return 0; - - // Optimize common patterns for __sync_add_and_fetch and - // __sync_sub_and_fetch where the result is not used. This allows us - // to use "lock" version of add, sub, inc, dec instructions. - // FIXME: Do not use special instructions but instead add the "lock" - // prefix to the target node somehow. The extra information will then be - // transferred to machine instruction and it denotes the prefix. - SDValue Chain = Node->getOperand(0); - SDValue Ptr = Node->getOperand(1); - SDValue Val = Node->getOperand(2); - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) - return 0; - - bool isInc = false, isDec = false, isSub = false, isCN = false; - ConstantSDNode *CN = dyn_cast(Val); - if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) { - isCN = true; - int64_t CNVal = CN->getSExtValue(); - if (CNVal == 1) - isInc = true; - else if (CNVal == -1) - isDec = true; - else if (CNVal >= 0) - Val = CurDAG->getTargetConstant(CNVal, NVT); - else { - isSub = true; - Val = CurDAG->getTargetConstant(-CNVal, NVT); - } - } else if (Val.hasOneUse() && - Val.getOpcode() == ISD::SUB && - X86::isZeroNode(Val.getOperand(0))) { - isSub = true; - Val = Val.getOperand(1); - } - - DebugLoc dl = Node->getDebugLoc(); - unsigned Opc = 0; - switch (NVT.getSimpleVT().SimpleTy) { - default: return 0; - case MVT::i8: - if (isInc) - Opc = X86::LOCK_INC8m; - else if (isDec) - Opc = X86::LOCK_DEC8m; - else if (isSub) { - if (isCN) - Opc = X86::LOCK_SUB8mi; - else - Opc = X86::LOCK_SUB8mr; - } else { - if (isCN) - Opc = X86::LOCK_ADD8mi; - else - Opc = X86::LOCK_ADD8mr; - } - break; - case MVT::i16: - if (isInc) - Opc = X86::LOCK_INC16m; - else if (isDec) - Opc = X86::LOCK_DEC16m; - else if (isSub) { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB16mi8; - else - Opc = X86::LOCK_SUB16mi; - } else - Opc = X86::LOCK_SUB16mr; - } else { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD16mi8; - else - Opc = X86::LOCK_ADD16mi; - } else - Opc = X86::LOCK_ADD16mr; - } - break; - case MVT::i32: - if (isInc) - Opc = X86::LOCK_INC32m; - else if (isDec) - Opc = X86::LOCK_DEC32m; - else if (isSub) { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB32mi8; - else - Opc = X86::LOCK_SUB32mi; - } else - Opc = X86::LOCK_SUB32mr; - } else { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD32mi8; - else - Opc = X86::LOCK_ADD32mi; - } else - Opc = X86::LOCK_ADD32mr; - } - break; - case MVT::i64: - if (isInc) - Opc = X86::LOCK_INC64m; - else if (isDec) - Opc = X86::LOCK_DEC64m; - else if (isSub) { - Opc = X86::LOCK_SUB64mr; - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB64mi8; - else if (i64immSExt32(Val.getNode())) - Opc = X86::LOCK_SUB64mi32; - } - } else { - Opc = X86::LOCK_ADD64mr; - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD64mi8; - else if (i64immSExt32(Val.getNode())) - Opc = X86::LOCK_ADD64mi32; - } - } - break; - } - - SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, NVT), 0); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(Node)->getMemOperand(); - if (isInc || isDec) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0); - cast(Ret)->setMemRefs(MemOp, MemOp + 1); - SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); - } else { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); - cast(Ret)->setMemRefs(MemOp, MemOp + 1); - SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); - } -} - +/// Atomic opcode table +/// enum AtomicOpc { + ADD, + SUB, + INC, + DEC, OR, AND, XOR, @@ -1668,6 +1605,58 @@ enum AtomicSz { }; static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { + { + X86::LOCK_ADD8mi, + X86::LOCK_ADD8mr, + X86::LOCK_ADD16mi8, + X86::LOCK_ADD16mi, + X86::LOCK_ADD16mr, + X86::LOCK_ADD32mi8, + X86::LOCK_ADD32mi, + X86::LOCK_ADD32mr, + X86::LOCK_ADD64mi8, + X86::LOCK_ADD64mi32, + X86::LOCK_ADD64mr, + }, + { + X86::LOCK_SUB8mi, + X86::LOCK_SUB8mr, + X86::LOCK_SUB16mi8, + X86::LOCK_SUB16mi, + X86::LOCK_SUB16mr, + X86::LOCK_SUB32mi8, + X86::LOCK_SUB32mi, + X86::LOCK_SUB32mr, + X86::LOCK_SUB64mi8, + X86::LOCK_SUB64mi32, + X86::LOCK_SUB64mr, + }, + { + 0, + X86::LOCK_INC8m, + 0, + 0, + X86::LOCK_INC16m, + 0, + 0, + X86::LOCK_INC32m, + 0, + 0, + X86::LOCK_INC64m, + }, + { + 0, + X86::LOCK_DEC8m, + 0, + 0, + X86::LOCK_DEC16m, + 0, + 0, + X86::LOCK_DEC32m, + 0, + 0, + X86::LOCK_DEC64m, + }, { X86::LOCK_OR8mi, X86::LOCK_OR8mr, @@ -1679,7 +1668,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_OR32mr, X86::LOCK_OR64mi8, X86::LOCK_OR64mi32, - X86::LOCK_OR64mr + X86::LOCK_OR64mr, }, { X86::LOCK_AND8mi, @@ -1692,7 +1681,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_AND32mr, X86::LOCK_AND64mi8, X86::LOCK_AND64mi32, - X86::LOCK_AND64mr + X86::LOCK_AND64mr, }, { X86::LOCK_XOR8mi, @@ -1705,18 +1694,74 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_XOR32mr, X86::LOCK_XOR64mi8, X86::LOCK_XOR64mi32, - X86::LOCK_XOR64mr + X86::LOCK_XOR64mr, } }; +// Return the target constant operand for atomic-load-op and do simple +// translations, such as from atomic-load-add to lock-sub. The return value is +// one of the following 3 cases: +// + target-constant, the operand could be supported as a target constant. +// + empty, the operand is not needed any more with the new op selected. +// + non-empty, otherwise. +static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, + SDLoc dl, + enum AtomicOpc &Op, EVT NVT, + SDValue Val) { + if (ConstantSDNode *CN = dyn_cast(Val)) { + int64_t CNVal = CN->getSExtValue(); + // Quit if not 32-bit imm. + if ((int32_t)CNVal != CNVal) + return Val; + // For atomic-load-add, we could do some optimizations. + if (Op == ADD) { + // Translate to INC/DEC if ADD by 1 or -1. + if ((CNVal == 1) || (CNVal == -1)) { + Op = (CNVal == 1) ? INC : DEC; + // No more constant operand after being translated into INC/DEC. + return SDValue(); + } + // Translate to SUB if ADD by negative value. + if (CNVal < 0) { + Op = SUB; + CNVal = -CNVal; + } + } + return CurDAG->getTargetConstant(CNVal, NVT); + } + + // If the value operand is single-used, try to optimize it. + if (Op == ADD && Val.hasOneUse()) { + // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x). + if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) { + Op = SUB; + return Val.getOperand(1); + } + // A special case for i16, which needs truncating as, in most cases, it's + // promoted to i32. We will translate + // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x)) + if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 && + Val.getOperand(0).getOpcode() == ISD::SUB && + X86::isZeroNode(Val.getOperand(0).getOperand(0))) { + Op = SUB; + Val = Val.getOperand(0); + return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT, + Val.getOperand(1)); + } + } + + return Val; +} + SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { if (Node->hasAnyUseOfValue(0)) return 0; + SDLoc dl(Node); + // Optimize common patterns for __sync_or_and_fetch and similar arith // operations where the result is not used. This allows us to use the "lock" // version of the arithmetic instruction. - // FIXME: Same as for 'add' and 'sub', try to merge those down here. SDValue Chain = Node->getOperand(0); SDValue Ptr = Node->getOperand(1); SDValue Val = Node->getOperand(2); @@ -1727,6 +1772,8 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { // Which index into the table. enum AtomicOpc Op; switch (Node->getOpcode()) { + default: + return 0; case ISD::ATOMIC_LOAD_OR: Op = OR; break; @@ -1736,16 +1783,14 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { case ISD::ATOMIC_LOAD_XOR: Op = XOR; break; - default: - return 0; + case ISD::ATOMIC_LOAD_ADD: + Op = ADD; + break; } - bool isCN = false; - ConstantSDNode *CN = dyn_cast(Val); - if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) { - isCN = true; - Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT); - } + Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val); + bool isUnOp = !Val.getNode(); + bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); unsigned Opc = 0; switch (NVT.getSimpleVT().SimpleTy) { @@ -1787,13 +1832,18 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { assert(Opc != 0 && "Invalid arith lock transform!"); - DebugLoc dl = Node->getDebugLoc(); + SDValue Ret; SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(Node)->getMemOperand(); - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); + if (isUnOp) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); + } else { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); + } cast(Ret)->setMemRefs(MemOp, MemOp + 1); SDValue RetVals[] = { Undef, Ret }; return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); @@ -1938,7 +1988,7 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, if (ChainCheck) // Make a new TokenFactor with all the other input chains except // for the load. - InputChain = CurDAG->getNode(ISD::TokenFactor, Chain.getDebugLoc(), + InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, &ChainOps[0], ChainOps.size()); } if (!ChainCheck) @@ -1986,8 +2036,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { SDValue Segment = CurDAG->getRegister(0, MVT::i32); const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, Disp, Segment, VMask, Chain}; - SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - VTs, Ops, array_lengthof(Ops)); + SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), VTs, Ops); // Node has 2 outputs: VDst and MVT::Other. // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. // We replace VDst of Node with VDst of ResNode, and Other of Node with Other @@ -2001,7 +2050,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); @@ -2032,6 +2081,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::x86_avx2_gather_d_d_256: case Intrinsic::x86_avx2_gather_q_d: case Intrinsic::x86_avx2_gather_q_d_256: { + if (!Subtarget->hasAVX2()) + break; unsigned Opc; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); @@ -2071,6 +2122,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSUB64_DAG: case X86ISD::ATOMNAND64_DAG: case X86ISD::ATOMAND64_DAG: + case X86ISD::ATOMMAX64_DAG: + case X86ISD::ATOMMIN64_DAG: + case X86ISD::ATOMUMAX64_DAG: + case X86ISD::ATOMUMIN64_DAG: case X86ISD::ATOMSWAP64_DAG: { unsigned Opc; switch (Opcode) { @@ -2081,6 +2136,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; + case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break; + case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break; + case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break; + case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break; case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; } SDNode *RetVal = SelectAtomic64(Node, Opc); @@ -2089,15 +2148,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; } - case ISD::ATOMIC_LOAD_ADD: { - SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); - if (RetVal) - return RetVal; - break; - } case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: { + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_ADD: { SDNode *RetVal = SelectAtomicLoadArith(Node, NVT); if (RetVal) return RetVal; @@ -2198,7 +2252,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); SDValue Ops[] = {N1, InFlag}; - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); @@ -2212,13 +2266,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue N1 = Node->getOperand(1); bool isSigned = Opcode == ISD::SMUL_LOHI; + bool hasBMI2 = Subtarget->hasBMI2(); if (!isSigned) { switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; - case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; - case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; + case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; + MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; + case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r; + MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; } } else { switch (NVT.getSimpleVT().SimpleTy) { @@ -2230,13 +2287,31 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } } - unsigned LoReg, HiReg; - switch (NVT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unsupported VT!"); - case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; - case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; - case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; - case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; + unsigned SrcReg, LoReg, HiReg; + switch (Opc) { + default: llvm_unreachable("Unknown MUL opcode!"); + case X86::IMUL8r: + case X86::MUL8r: + SrcReg = LoReg = X86::AL; HiReg = X86::AH; + break; + case X86::IMUL16r: + case X86::MUL16r: + SrcReg = LoReg = X86::AX; HiReg = X86::DX; + break; + case X86::IMUL32r: + case X86::MUL32r: + SrcReg = LoReg = X86::EAX; HiReg = X86::EDX; + break; + case X86::IMUL64r: + case X86::MUL64r: + SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; + break; + case X86::MULX32rr: + SrcReg = X86::EDX; LoReg = HiReg = 0; + break; + case X86::MULX64rr: + SrcReg = X86::RDX; LoReg = HiReg = 0; + break; } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; @@ -2248,22 +2323,43 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { std::swap(N0, N1); } - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, N0, SDValue()).getValue(1); + SDValue ResHi, ResLo; if (foldedLoad) { + SDValue Chain; SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; - SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); - InFlag = SDValue(CNode, 1); + if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + Chain = SDValue(CNode, 2); + InFlag = SDValue(CNode, 3); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + Chain = SDValue(CNode, 0); + InFlag = SDValue(CNode, 1); + } // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + ReplaceUses(N1.getValue(1), Chain); } else { - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag); - InFlag = SDValue(CNode, 0); + SDValue Ops[] = { N1, InFlag }; + if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + InFlag = SDValue(CNode, 2); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + InFlag = SDValue(CNode, 0); + } } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -2288,19 +2384,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - LoReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (ResLo.getNode() == 0) { + assert(LoReg && "Register for low half is not defined!"); + ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, + InFlag); + InFlag = ResLo.getValue(2); + } + ReplaceUses(SDValue(Node, 0), ResLo); + DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (ResHi.getNode() == 0) { + assert(HiReg && "Register for high half is not defined!"); + ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, + InFlag); + InFlag = ResHi.getValue(2); + } + ReplaceUses(SDValue(Node, 1), ResHi); + DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); } return NULL; @@ -2331,27 +2433,24 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } unsigned LoReg, HiReg, ClrReg; - unsigned ClrOpcode, SExtOpcode; + unsigned SExtOpcode; switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: LoReg = X86::AL; ClrReg = HiReg = X86::AH; - ClrOpcode = 0; SExtOpcode = X86::CBW; break; case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; - ClrOpcode = X86::MOV16r0; ClrReg = X86::DX; + ClrReg = X86::DX; SExtOpcode = X86::CWD; break; case MVT::i32: LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; - ClrOpcode = X86::MOV32r0; SExtOpcode = X86::CDQ; break; case MVT::i64: LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; - ClrOpcode = X86::MOV64r0; SExtOpcode = X86::CQO; break; } @@ -2369,8 +2468,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; Move = SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, - MVT::Other, Ops, - array_lengthof(Ops)), 0); + MVT::Other, Ops), 0); Chain = Move.getValue(1); ReplaceUses(N0.getValue(1), Chain); } else { @@ -2390,8 +2488,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); } else { // Zero out the high part, effectively zero extending the input. - SDValue ClrNode = - SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); + SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0); + switch (NVT.getSimpleVT().SimpleTy) { + case MVT::i16: + ClrNode = + SDValue(CurDAG->getMachineNode( + TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, + CurDAG->getTargetConstant(X86::sub_16bit, MVT::i32)), + 0); + break; + case MVT::i32: + break; + case MVT::i64: + ClrNode = + SDValue(CurDAG->getMachineNode( + TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, + CurDAG->getTargetConstant(0, MVT::i64), ClrNode, + CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), + 0); + break; + default: + llvm_unreachable("Unexpected division source"); + } + InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, ClrNode, InFlag).getValue(1); } @@ -2401,8 +2520,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); InFlag = SDValue(CNode, 1); // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); @@ -2413,6 +2531,11 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Prevent use of AH in a REX instruction by referencing AX instead. // Shift it down 8 bits. + // + // The current assumption of the register allocator is that isel + // won't generate explicit references to the GPR8_NOREX registers. If + // the allocator and/or the backend get enhanced to be more robust in + // that regard, this can be, and should be, removed. if (HiReg == X86::AH && Subtarget->is64Bit() && !SDValue(Node, 1).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -2501,7 +2624,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i8, Reg); // Emit a testb. - return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testl %eax, $2048" to "testb %ah, $8". @@ -2532,8 +2661,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only // target GR8_NOREX registers, so make sure the register class is // forced. - return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32, - Subreg, ShiftedImm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, + MVT::i32, Subreg, ShiftedImm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testl %eax, $32776" to "testw %ax, $32776". @@ -2549,7 +2683,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i16, Reg); // Emit a testw. - return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testq %rax, $268468232" to "testl %eax, $268468232". @@ -2565,7 +2705,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i32, Reg); // Emit a testl. - return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } } break; @@ -2610,9 +2756,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT LdVT = LoadNode->getMemoryVT(); unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); MachineSDNode *Result = CurDAG->getMachineNode(newOpc, - Node->getDebugLoc(), - MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + SDLoc(Node), + MVT::i32, MVT::Other, Ops); Result->setMemRefs(MemOp, MemOp + 2); ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); @@ -2620,85 +2765,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return Result; } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPESTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - SDValue N3 = Node->getOperand(3); - SDValue N4 = Node->getOperand(4); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast(N4); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, - X86::EAX, N1, SDValue()).getValue(1); - InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, - N3, InFlag).getValue(1); - - SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : - X86::PCMPESTRIrr; - InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPISTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast(N2); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue Ops[] = { N0, N1, getI8Imm(Imm) }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : - X86::PCMPISTRIrr; - SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } } SDNode *ResNode = SelectCode(Node);