X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelDAGToDAG.cpp;h=968b3583c36d7176e749d36d57e141d3797022fe;hb=299eb2e6e15f0df73ce6202849fd2b4f0713cc1f;hp=88cfe10dfa9c075e9e569561ce229b614c303899;hpb=aa0cfea9a439cb8de1ff288bd6db9b4a86c4da7b;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 88cfe10dfa9..968b3583c36 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -19,24 +19,21 @@ #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Type.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CFG.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); @@ -191,7 +188,6 @@ namespace { SDNode *Select(SDNode *N); SDNode *SelectGather(SDNode *N, unsigned Opc); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); - SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); @@ -204,9 +200,6 @@ namespace { bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectSingleRegAddr(SDNode *Parent, SDValue N, SDValue &Base, - SDValue &Scale, SDValue &Index, SDValue &Disp, - SDValue &Segment); bool SelectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); @@ -287,13 +280,13 @@ namespace { /// getTargetMachine - Return a reference to the TargetMachine, casted /// to the target-specific type. - const X86TargetMachine &getTargetMachine() { + const X86TargetMachine &getTargetMachine() const { return static_cast(TM); } /// getInstrInfo - Return a reference to the TargetInstrInfo, casted /// to the target-specific type. - const X86InstrInfo *getInstrInfo() { + const X86InstrInfo *getInstrInfo() const { return getTargetMachine().getInstrInfo(); } }; @@ -366,7 +359,7 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { /// MoveBelowCallOrigChain - Replace the original chain operand of the call with /// load's chain operand and move load below the call's chain operand. static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, - SDValue Call, SDValue OrigChain) { + SDValue Call, SDValue OrigChain) { SmallVector Ops; SDValue Chain = OrigChain.getOperand(0); if (Chain.getNode() == Load.getNode()) @@ -390,11 +383,13 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size()); CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); + + unsigned NumOps = Call.getNode()->getNumOperands(); Ops.clear(); Ops.push_back(SDValue(Load.getNode(), 1)); - for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) + for (unsigned i = 1, e = NumOps; i != e; ++i) Ops.push_back(Call.getOperand(i)); - CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps); } /// isCalleeLoad - Return true if call address is a load and it can be @@ -403,6 +398,10 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, /// In the case of a tail call, there isn't a callseq node between the call /// chain and the load. static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { + // The transformation is somewhat dangerous if the call's chain was glued to + // the call. After MoveBelowOrigChain the load is moved between the call and + // the chain, this can create a cycle if the load is not folded. So it is + // *really* important that we are sure the load will be folded. if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) return false; LoadSDNode *LD = dyn_cast(Callee.getNode()); @@ -421,6 +420,11 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { if (!Chain.getNumOperands()) return false; + // Since we are not checking for AA here, conservatively abort if the chain + // writes to memory. It's not safe to move the callee (a load) across a store. + if (isa(Chain.getNode()) && + cast(Chain.getNode())->writeMem()) + return false; if (Chain.getOperand(0).getNode() == Callee.getNode()) return true; if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && @@ -432,15 +436,21 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); + OptForSize = MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. if (OptLevel != CodeGenOpt::None && - (N->getOpcode() == X86ISD::CALL || - N->getOpcode() == X86ISD::TC_RETURN)) { + // Only does this when target favors doesn't favor register indirect + // call. + ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || + (N->getOpcode() == X86ISD::TC_RETURN && + // Only does this if load can be folded into TC_RETURN. + (Subtarget->is64Bit() || + getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { /// Also try moving call address load from outside callseq_start to just /// before the call to allow it to be folded. /// @@ -1034,8 +1044,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.IndexReg = ShVal; return false; } - break; } + break; case ISD::SRL: { // Scale must not be used already. @@ -1296,7 +1306,9 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, // that are not a MemSDNode, and thus don't have proper addrspace info. Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores - Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme + Parent->getOpcode() != X86ISD::TLSCALL && // Fixme + Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp + Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp unsigned AddrSpace = cast(Parent)->getPointerInfo().getAddrSpace(); // AddrSpace 256 -> GS, 257 -> FS. @@ -1322,31 +1334,6 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, return true; } -/// SelectSingleRegAddr - Like SelectAddr, but reject any address that would -/// require more than one allocatable register. -/// -/// This is used for a TCRETURNmi64 instruction when used to tail call a -/// variadic function with 6 arguments: Only %r11 is available from GR64_TC. -/// The other scratch register, %rax, is needed to pass in the number of vector -/// registers used in the variadic arguments. -/// -bool X86DAGToDAGISel::SelectSingleRegAddr(SDNode *Parent, SDValue N, - SDValue &Base, - SDValue &Scale, SDValue &Index, - SDValue &Disp, SDValue &Segment) { - if (!SelectAddr(Parent, N, Base, Scale, Index, Disp, Segment)) - return false; - // Anything %RIP relative is fine. - if (RegisterSDNode *Reg = dyn_cast(Base)) - if (Reg->getReg() == X86::RIP) - return true; - // Check that the index register is 0. - if (RegisterSDNode *Reg = dyn_cast(Index)) - if (Reg->getReg() == 0) - return true; - return false; -} - /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to /// match a load whose top elements are either undef or zeros. The load flavor /// is derived from the type of N, which is either v4f32 or v2f64. @@ -1508,6 +1495,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue In1 = Node->getOperand(1); SDValue In2L = Node->getOperand(2); SDValue In2H = Node->getOperand(3); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return NULL; @@ -1515,165 +1503,18 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { MemOp[0] = cast(Node)->getMemOperand(); const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - MVT::i32, MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + MVT::i32, MVT::i32, MVT::Other, Ops); cast(ResNode)->setMemRefs(MemOp, MemOp + 1); return ResNode; } -// FIXME: Figure out some way to unify this with the 'or' and other code -// below. -SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { - if (Node->hasAnyUseOfValue(0)) - return 0; - - // Optimize common patterns for __sync_add_and_fetch and - // __sync_sub_and_fetch where the result is not used. This allows us - // to use "lock" version of add, sub, inc, dec instructions. - // FIXME: Do not use special instructions but instead add the "lock" - // prefix to the target node somehow. The extra information will then be - // transferred to machine instruction and it denotes the prefix. - SDValue Chain = Node->getOperand(0); - SDValue Ptr = Node->getOperand(1); - SDValue Val = Node->getOperand(2); - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) - return 0; - - bool isInc = false, isDec = false, isSub = false, isCN = false; - ConstantSDNode *CN = dyn_cast(Val); - if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) { - isCN = true; - int64_t CNVal = CN->getSExtValue(); - if (CNVal == 1) - isInc = true; - else if (CNVal == -1) - isDec = true; - else if (CNVal >= 0) - Val = CurDAG->getTargetConstant(CNVal, NVT); - else { - isSub = true; - Val = CurDAG->getTargetConstant(-CNVal, NVT); - } - } else if (Val.hasOneUse() && - Val.getOpcode() == ISD::SUB && - X86::isZeroNode(Val.getOperand(0))) { - isSub = true; - Val = Val.getOperand(1); - } - - DebugLoc dl = Node->getDebugLoc(); - unsigned Opc = 0; - switch (NVT.getSimpleVT().SimpleTy) { - default: return 0; - case MVT::i8: - if (isInc) - Opc = X86::LOCK_INC8m; - else if (isDec) - Opc = X86::LOCK_DEC8m; - else if (isSub) { - if (isCN) - Opc = X86::LOCK_SUB8mi; - else - Opc = X86::LOCK_SUB8mr; - } else { - if (isCN) - Opc = X86::LOCK_ADD8mi; - else - Opc = X86::LOCK_ADD8mr; - } - break; - case MVT::i16: - if (isInc) - Opc = X86::LOCK_INC16m; - else if (isDec) - Opc = X86::LOCK_DEC16m; - else if (isSub) { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB16mi8; - else - Opc = X86::LOCK_SUB16mi; - } else - Opc = X86::LOCK_SUB16mr; - } else { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD16mi8; - else - Opc = X86::LOCK_ADD16mi; - } else - Opc = X86::LOCK_ADD16mr; - } - break; - case MVT::i32: - if (isInc) - Opc = X86::LOCK_INC32m; - else if (isDec) - Opc = X86::LOCK_DEC32m; - else if (isSub) { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB32mi8; - else - Opc = X86::LOCK_SUB32mi; - } else - Opc = X86::LOCK_SUB32mr; - } else { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD32mi8; - else - Opc = X86::LOCK_ADD32mi; - } else - Opc = X86::LOCK_ADD32mr; - } - break; - case MVT::i64: - if (isInc) - Opc = X86::LOCK_INC64m; - else if (isDec) - Opc = X86::LOCK_DEC64m; - else if (isSub) { - Opc = X86::LOCK_SUB64mr; - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB64mi8; - else if (i64immSExt32(Val.getNode())) - Opc = X86::LOCK_SUB64mi32; - } - } else { - Opc = X86::LOCK_ADD64mr; - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD64mi8; - else if (i64immSExt32(Val.getNode())) - Opc = X86::LOCK_ADD64mi32; - } - } - break; - } - - SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, NVT), 0); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(Node)->getMemOperand(); - if (isInc || isDec) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0); - cast(Ret)->setMemRefs(MemOp, MemOp + 1); - SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); - } else { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); - cast(Ret)->setMemRefs(MemOp, MemOp + 1); - SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); - } -} - +/// Atomic opcode table +/// enum AtomicOpc { + ADD, + SUB, + INC, + DEC, OR, AND, XOR, @@ -1696,6 +1537,58 @@ enum AtomicSz { }; static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { + { + X86::LOCK_ADD8mi, + X86::LOCK_ADD8mr, + X86::LOCK_ADD16mi8, + X86::LOCK_ADD16mi, + X86::LOCK_ADD16mr, + X86::LOCK_ADD32mi8, + X86::LOCK_ADD32mi, + X86::LOCK_ADD32mr, + X86::LOCK_ADD64mi8, + X86::LOCK_ADD64mi32, + X86::LOCK_ADD64mr, + }, + { + X86::LOCK_SUB8mi, + X86::LOCK_SUB8mr, + X86::LOCK_SUB16mi8, + X86::LOCK_SUB16mi, + X86::LOCK_SUB16mr, + X86::LOCK_SUB32mi8, + X86::LOCK_SUB32mi, + X86::LOCK_SUB32mr, + X86::LOCK_SUB64mi8, + X86::LOCK_SUB64mi32, + X86::LOCK_SUB64mr, + }, + { + 0, + X86::LOCK_INC8m, + 0, + 0, + X86::LOCK_INC16m, + 0, + 0, + X86::LOCK_INC32m, + 0, + 0, + X86::LOCK_INC64m, + }, + { + 0, + X86::LOCK_DEC8m, + 0, + 0, + X86::LOCK_DEC16m, + 0, + 0, + X86::LOCK_DEC32m, + 0, + 0, + X86::LOCK_DEC64m, + }, { X86::LOCK_OR8mi, X86::LOCK_OR8mr, @@ -1707,7 +1600,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_OR32mr, X86::LOCK_OR64mi8, X86::LOCK_OR64mi32, - X86::LOCK_OR64mr + X86::LOCK_OR64mr, }, { X86::LOCK_AND8mi, @@ -1720,7 +1613,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_AND32mr, X86::LOCK_AND64mi8, X86::LOCK_AND64mi32, - X86::LOCK_AND64mr + X86::LOCK_AND64mr, }, { X86::LOCK_XOR8mi, @@ -1733,18 +1626,74 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_XOR32mr, X86::LOCK_XOR64mi8, X86::LOCK_XOR64mi32, - X86::LOCK_XOR64mr + X86::LOCK_XOR64mr, } }; +// Return the target constant operand for atomic-load-op and do simple +// translations, such as from atomic-load-add to lock-sub. The return value is +// one of the following 3 cases: +// + target-constant, the operand could be supported as a target constant. +// + empty, the operand is not needed any more with the new op selected. +// + non-empty, otherwise. +static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, + DebugLoc dl, + enum AtomicOpc &Op, EVT NVT, + SDValue Val) { + if (ConstantSDNode *CN = dyn_cast(Val)) { + int64_t CNVal = CN->getSExtValue(); + // Quit if not 32-bit imm. + if ((int32_t)CNVal != CNVal) + return Val; + // For atomic-load-add, we could do some optimizations. + if (Op == ADD) { + // Translate to INC/DEC if ADD by 1 or -1. + if ((CNVal == 1) || (CNVal == -1)) { + Op = (CNVal == 1) ? INC : DEC; + // No more constant operand after being translated into INC/DEC. + return SDValue(); + } + // Translate to SUB if ADD by negative value. + if (CNVal < 0) { + Op = SUB; + CNVal = -CNVal; + } + } + return CurDAG->getTargetConstant(CNVal, NVT); + } + + // If the value operand is single-used, try to optimize it. + if (Op == ADD && Val.hasOneUse()) { + // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x). + if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) { + Op = SUB; + return Val.getOperand(1); + } + // A special case for i16, which needs truncating as, in most cases, it's + // promoted to i32. We will translate + // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x)) + if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 && + Val.getOperand(0).getOpcode() == ISD::SUB && + X86::isZeroNode(Val.getOperand(0).getOperand(0))) { + Op = SUB; + Val = Val.getOperand(0); + return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT, + Val.getOperand(1)); + } + } + + return Val; +} + SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { if (Node->hasAnyUseOfValue(0)) return 0; + DebugLoc dl = Node->getDebugLoc(); + // Optimize common patterns for __sync_or_and_fetch and similar arith // operations where the result is not used. This allows us to use the "lock" // version of the arithmetic instruction. - // FIXME: Same as for 'add' and 'sub', try to merge those down here. SDValue Chain = Node->getOperand(0); SDValue Ptr = Node->getOperand(1); SDValue Val = Node->getOperand(2); @@ -1755,6 +1704,8 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { // Which index into the table. enum AtomicOpc Op; switch (Node->getOpcode()) { + default: + return 0; case ISD::ATOMIC_LOAD_OR: Op = OR; break; @@ -1764,16 +1715,14 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { case ISD::ATOMIC_LOAD_XOR: Op = XOR; break; - default: - return 0; + case ISD::ATOMIC_LOAD_ADD: + Op = ADD; + break; } - bool isCN = false; - ConstantSDNode *CN = dyn_cast(Val); - if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) { - isCN = true; - Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT); - } + Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val); + bool isUnOp = !Val.getNode(); + bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); unsigned Opc = 0; switch (NVT.getSimpleVT().SimpleTy) { @@ -1815,13 +1764,18 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { assert(Opc != 0 && "Invalid arith lock transform!"); - DebugLoc dl = Node->getDebugLoc(); + SDValue Ret; SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(Node)->getMemOperand(); - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); + if (isUnOp) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); + } else { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); + } cast(Ret)->setMemRefs(MemOp, MemOp + 1); SDValue RetVals[] = { Undef, Ret }; return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); @@ -2014,8 +1968,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { SDValue Segment = CurDAG->getRegister(0, MVT::i32); const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, Disp, Segment, VMask, Chain}; - SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - VTs, Ops, array_lengthof(Ops)); + SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), VTs, Ops); // Node has 2 outputs: VDst and MVT::Other. // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. // We replace VDst of Node with VDst of ResNode, and Other of Node with Other @@ -2099,6 +2052,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSUB64_DAG: case X86ISD::ATOMNAND64_DAG: case X86ISD::ATOMAND64_DAG: + case X86ISD::ATOMMAX64_DAG: + case X86ISD::ATOMMIN64_DAG: + case X86ISD::ATOMUMAX64_DAG: + case X86ISD::ATOMUMIN64_DAG: case X86ISD::ATOMSWAP64_DAG: { unsigned Opc; switch (Opcode) { @@ -2109,6 +2066,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; + case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break; + case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break; + case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break; + case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break; case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; } SDNode *RetVal = SelectAtomic64(Node, Opc); @@ -2117,15 +2078,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; } - case ISD::ATOMIC_LOAD_ADD: { - SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); - if (RetVal) - return RetVal; - break; - } case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: { + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_ADD: { SDNode *RetVal = SelectAtomicLoadArith(Node, NVT); if (RetVal) return RetVal; @@ -2226,7 +2182,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); SDValue Ops[] = {N1, InFlag}; - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); @@ -2240,13 +2196,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue N1 = Node->getOperand(1); bool isSigned = Opcode == ISD::SMUL_LOHI; + bool hasBMI2 = Subtarget->hasBMI2(); if (!isSigned) { switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; - case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; - case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; + case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; + MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; + case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r; + MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; } } else { switch (NVT.getSimpleVT().SimpleTy) { @@ -2258,13 +2217,31 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } } - unsigned LoReg, HiReg; - switch (NVT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unsupported VT!"); - case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; - case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; - case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; - case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; + unsigned SrcReg, LoReg, HiReg; + switch (Opc) { + default: llvm_unreachable("Unknown MUL opcode!"); + case X86::IMUL8r: + case X86::MUL8r: + SrcReg = LoReg = X86::AL; HiReg = X86::AH; + break; + case X86::IMUL16r: + case X86::MUL16r: + SrcReg = LoReg = X86::AX; HiReg = X86::DX; + break; + case X86::IMUL32r: + case X86::MUL32r: + SrcReg = LoReg = X86::EAX; HiReg = X86::EDX; + break; + case X86::IMUL64r: + case X86::MUL64r: + SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; + break; + case X86::MULX32rr: + SrcReg = X86::EDX; LoReg = HiReg = 0; + break; + case X86::MULX64rr: + SrcReg = X86::RDX; LoReg = HiReg = 0; + break; } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; @@ -2276,22 +2253,43 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { std::swap(N0, N1); } - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, N0, SDValue()).getValue(1); + SDValue ResHi, ResLo; if (foldedLoad) { + SDValue Chain; SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; - SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); - InFlag = SDValue(CNode, 1); + if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + Chain = SDValue(CNode, 2); + InFlag = SDValue(CNode, 3); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + Chain = SDValue(CNode, 0); + InFlag = SDValue(CNode, 1); + } // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + ReplaceUses(N1.getValue(1), Chain); } else { - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag); - InFlag = SDValue(CNode, 0); + SDValue Ops[] = { N1, InFlag }; + if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + InFlag = SDValue(CNode, 2); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + InFlag = SDValue(CNode, 0); + } } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -2316,21 +2314,30 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - LoReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (ResLo.getNode() == 0) { + assert(LoReg && "Register for low half is not defined!"); + ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, + InFlag); + InFlag = ResLo.getValue(2); + } + ReplaceUses(SDValue(Node, 0), ResLo); + DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (ResHi.getNode() == 0) { + assert(HiReg && "Register for high half is not defined!"); + ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, + InFlag); + InFlag = ResHi.getValue(2); + } + ReplaceUses(SDValue(Node, 1), ResHi); + DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); } + // Propagate ordering to the last node, for now. + CurDAG->AssignOrdering(InFlag.getNode(), CurDAG->GetOrdering(Node)); + return NULL; } @@ -2397,8 +2404,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; Move = SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, - MVT::Other, Ops, - array_lengthof(Ops)), 0); + MVT::Other, Ops), 0); Chain = Move.getValue(1); ReplaceUses(N0.getValue(1), Chain); } else { @@ -2429,8 +2435,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); InFlag = SDValue(CNode, 1); // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); @@ -2529,7 +2534,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i8, Reg); // Emit a testb. - return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testl %eax, $2048" to "testb %ah, $8". @@ -2560,8 +2571,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only // target GR8_NOREX registers, so make sure the register class is // forced. - return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32, - Subreg, ShiftedImm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, + MVT::i32, Subreg, ShiftedImm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testl %eax, $32776" to "testw %ax, $32776". @@ -2577,7 +2593,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i16, Reg); // Emit a testw. - return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testq %rax, $268468232" to "testl %eax, $268468232". @@ -2593,7 +2615,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i32, Reg); // Emit a testl. - return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } } break; @@ -2639,8 +2667,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); MachineSDNode *Result = CurDAG->getMachineNode(newOpc, Node->getDebugLoc(), - MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + MVT::i32, MVT::Other, Ops); Result->setMemRefs(MemOp, MemOp + 2); ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); @@ -2648,85 +2675,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return Result; } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPESTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - SDValue N3 = Node->getOperand(3); - SDValue N4 = Node->getOperand(4); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast(N4); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, - X86::EAX, N1, SDValue()).getValue(1); - InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, - N3, InFlag).getValue(1); - - SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : - X86::PCMPESTRIrr; - InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPISTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast(N2); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue Ops[] = { N0, N1, getI8Imm(Imm) }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : - X86::PCMPISTRIrr; - SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } } SDNode *ResNode = SelectCode(Node);