#include "X86TargetMachine.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
private:
SDNode *Select(SDNode *N);
+ SDNode *SelectGather(SDNode *N, unsigned Opc);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
const TargetInstrInfo *TII = TM.getInstrInfo();
if (Subtarget->isTargetCygMing()) {
unsigned CallOp =
- Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32;
+ Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
BuildMI(BB, DebugLoc(),
TII->get(CallOp)).addExternalSymbol("__main");
}
// For more information see http://people.redhat.com/drepper/tls.pdf
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
- Subtarget->isTargetELF())
+ Subtarget->isTargetLinux())
switch (N->getPointerInfo().getAddrSpace()) {
case 256:
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
// Handle X86-64 rip-relative addresses. We check this before checking direct
// folding because RIP is preferable to non-RIP accesses.
- if (Subtarget->is64Bit() &&
+ if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
// Under X86-64 non-small code model, GV (and friends) are 64-bits, so
// they cannot be folded into immediate fields.
// FIXME: This can be improved for kernel and other models?
- (M == CodeModel::Small || M == CodeModel::Kernel) &&
- // Base and index reg must be 0 in order to use %rip as base and lowering
- // must allow RIP.
- !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
+ (M == CodeModel::Small || M == CodeModel::Kernel)) {
+ // Base and index reg must be 0 in order to use %rip as base.
+ if (AM.hasBaseOrIndexReg())
+ return true;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
X86ISelAddressMode Backup = AM;
AM.GV = G->getGlobal();
}
// Handle the case when globals fit in our immediate field: This is true for
- // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit
- // mode, this results in a non-RIP-relative computation.
+ // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit
+ // mode, this only applies to a non-RIP-relative computation.
if (!Subtarget->is64Bit() ||
- ((M == CodeModel::Small || M == CodeModel::Kernel) &&
- TM.getRelocationModel() == Reloc::Static)) {
+ M == CodeModel::Small || M == CodeModel::Kernel) {
+ assert(N.getOpcode() != X86ISD::WrapperRIP &&
+ "RIP-relative addressing already handled");
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
AM.GV = G->getGlobal();
AM.Disp += G->getOffset();
return false;
}
+// Insert a node into the DAG at least before the Pos node's position. This
+// will reposition the node as needed, and will assign it a node ID that is <=
+// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
+// IDs! The selection DAG must no longer depend on their uniqueness when this
+// is used.
+static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
+ if (N.getNode()->getNodeId() == -1 ||
+ N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
+ DAG.RepositionNode(Pos.getNode(), N.getNode());
+ N.getNode()->setNodeId(Pos.getNode()->getNodeId());
+ }
+}
+
// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This
// allows us to convert the shift and and into an h-register extract and
// a scaled index. Returns false if the simplification is performed.
SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8);
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
- // Insert the new nodes into the topological ordering.
- if (Eight.getNode()->getNodeId() == -1 ||
- Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
- DAG.RepositionNode(X.getNode(), Eight.getNode());
- Eight.getNode()->setNodeId(X.getNode()->getNodeId());
- }
- if (NewMask.getNode()->getNodeId() == -1 ||
- NewMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
- DAG.RepositionNode(X.getNode(), NewMask.getNode());
- NewMask.getNode()->setNodeId(X.getNode()->getNodeId());
- }
- if (Srl.getNode()->getNodeId() == -1 ||
- Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
- DAG.RepositionNode(Shift.getNode(), Srl.getNode());
- Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
- }
- if (And.getNode()->getNodeId() == -1 ||
- And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- DAG.RepositionNode(N.getNode(), And.getNode());
- And.getNode()->setNodeId(N.getNode()->getNodeId());
- }
- if (ShlCount.getNode()->getNodeId() == -1 ||
- ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
- DAG.RepositionNode(X.getNode(), ShlCount.getNode());
- ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
- }
- if (Shl.getNode()->getNodeId() == -1 ||
- Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- DAG.RepositionNode(N.getNode(), Shl.getNode());
- Shl.getNode()->setNodeId(N.getNode()->getNodeId());
- }
+ // Insert the new nodes into the topological ordering. We must do this in
+ // a valid topological ordering as nothing is going to go back and re-sort
+ // these nodes. We continually insert before 'N' in sequence as this is
+ // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+ // hierarchy left to express.
+ InsertDAGNode(DAG, N, Eight);
+ InsertDAGNode(DAG, N, Srl);
+ InsertDAGNode(DAG, N, NewMask);
+ InsertDAGNode(DAG, N, And);
+ InsertDAGNode(DAG, N, ShlCount);
+ InsertDAGNode(DAG, N, Shl);
DAG.ReplaceAllUsesWith(N, Shl);
AM.IndexReg = And;
AM.Scale = (1 << ScaleLog);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
- // Insert the new nodes into the topological ordering.
- if (NewMask.getNode()->getNodeId() == -1 ||
- NewMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
- DAG.RepositionNode(X.getNode(), NewMask.getNode());
- NewMask.getNode()->setNodeId(X.getNode()->getNodeId());
- }
- if (NewAnd.getNode()->getNodeId() == -1 ||
- NewAnd.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
- DAG.RepositionNode(Shift.getNode(), NewAnd.getNode());
- NewAnd.getNode()->setNodeId(Shift.getNode()->getNodeId());
- }
- if (NewShift.getNode()->getNodeId() == -1 ||
- NewShift.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- DAG.RepositionNode(N.getNode(), NewShift.getNode());
- NewShift.getNode()->setNodeId(N.getNode()->getNodeId());
- }
+ // Insert the new nodes into the topological ordering. We must do this in
+ // a valid topological ordering as nothing is going to go back and re-sort
+ // these nodes. We continually insert before 'N' in sequence as this is
+ // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+ // hierarchy left to express.
+ InsertDAGNode(DAG, N, NewMask);
+ InsertDAGNode(DAG, N, NewAnd);
+ InsertDAGNode(DAG, N, NewShift);
DAG.ReplaceAllUsesWith(N, NewShift);
AM.Scale = 1 << ShiftAmt;
APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(),
MaskLZ);
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(X, MaskedHighBits, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(X, KnownZero, KnownOne);
if (MaskedHighBits != KnownZero) return true;
// We've identified a pattern that can be transformed into a single shift
assert(X.getValueType() != VT);
// We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, X.getDebugLoc(), VT, X);
- if (NewX.getNode()->getNodeId() == -1 ||
- NewX.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- DAG.RepositionNode(N.getNode(), NewX.getNode());
- NewX.getNode()->setNodeId(N.getNode()->getNodeId());
- }
+ InsertDAGNode(DAG, N, NewX);
X = NewX;
}
DebugLoc DL = N.getDebugLoc();
SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8);
SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
- if (NewSRLAmt.getNode()->getNodeId() == -1 ||
- NewSRLAmt.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- DAG.RepositionNode(N.getNode(), NewSRLAmt.getNode());
- NewSRLAmt.getNode()->setNodeId(N.getNode()->getNodeId());
- }
- if (NewSRL.getNode()->getNodeId() == -1 ||
- NewSRL.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- DAG.RepositionNode(N.getNode(), NewSRL.getNode());
- NewSRL.getNode()->setNodeId(N.getNode()->getNodeId());
- }
- if (NewSHLAmt.getNode()->getNodeId() == -1 ||
- NewSHLAmt.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- DAG.RepositionNode(N.getNode(), NewSHLAmt.getNode());
- NewSHLAmt.getNode()->setNodeId(N.getNode()->getNodeId());
- }
- if (NewSHL.getNode()->getNodeId() == -1 ||
- NewSHL.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- DAG.RepositionNode(N.getNode(), NewSHL.getNode());
- NewSHL.getNode()->setNodeId(N.getNode()->getNodeId());
- }
+
+ // Insert the new nodes into the topological ordering. We must do this in
+ // a valid topological ordering as nothing is going to go back and re-sort
+ // these nodes. We continually insert before 'N' in sequence as this is
+ // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+ // hierarchy left to express.
+ InsertDAGNode(DAG, N, NewSRLAmt);
+ InsertDAGNode(DAG, N, NewSRL);
+ InsertDAGNode(DAG, N, NewSHLAmt);
+ InsertDAGNode(DAG, N, NewSHL);
DAG.ReplaceAllUsesWith(N, NewSHL);
AM.Scale = 1 << AMShiftAmt;
AM.Scale = 1;
// Insert the new nodes into the topological ordering.
- if (Zero.getNode()->getNodeId() == -1 ||
- Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- CurDAG->RepositionNode(N.getNode(), Zero.getNode());
- Zero.getNode()->setNodeId(N.getNode()->getNodeId());
- }
- if (Neg.getNode()->getNodeId() == -1 ||
- Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
- CurDAG->RepositionNode(N.getNode(), Neg.getNode());
- Neg.getNode()->setNodeId(N.getNode()->getNodeId());
- }
+ InsertDAGNode(*CurDAG, N, Zero);
+ InsertDAGNode(*CurDAG, N, Neg);
return false;
}
AtomicSzEnd
};
-static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
{
X86::LOCK_OR8mi,
X86::LOCK_OR8mr,
return true;
}
+/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
+/// is suitable for doing the {load; increment or decrement; store} to modify
+/// transformation.
+static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
+ SDValue StoredVal, SelectionDAG *CurDAG,
+ LoadSDNode* &LoadNode, SDValue &InputChain) {
+
+ // is the value stored the result of a DEC or INC?
+ if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
+
+ // is the stored value result 0 of the load?
+ if (StoredVal.getResNo() != 0) return false;
+
+ // are there other uses of the loaded value than the inc or dec?
+ if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
+
+ // is the store non-extending and non-indexed?
+ if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
+ return false;
+
+ SDValue Load = StoredVal->getOperand(0);
+ // Is the stored value a non-extending and non-indexed load?
+ if (!ISD::isNormalLoad(Load.getNode())) return false;
+
+ // Return LoadNode by reference.
+ LoadNode = cast<LoadSDNode>(Load);
+ // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
+ EVT LdVT = LoadNode->getMemoryVT();
+ if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
+ LdVT != MVT::i8)
+ return false;
+
+ // Is store the only read of the loaded value?
+ if (!Load.hasOneUse())
+ return false;
+
+ // Is the address of the store the same as the load?
+ if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
+ LoadNode->getOffset() != StoreNode->getOffset())
+ return false;
+
+ // Check if the chain is produced by the load or is a TokenFactor with
+ // the load output chain as an operand. Return InputChain by reference.
+ SDValue Chain = StoreNode->getChain();
+
+ bool ChainCheck = false;
+ if (Chain == Load.getValue(1)) {
+ ChainCheck = true;
+ InputChain = LoadNode->getChain();
+ } else if (Chain.getOpcode() == ISD::TokenFactor) {
+ SmallVector<SDValue, 4> ChainOps;
+ for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
+ SDValue Op = Chain.getOperand(i);
+ if (Op == Load.getValue(1)) {
+ ChainCheck = true;
+ continue;
+ }
+
+ // Make sure using Op as part of the chain would not cause a cycle here.
+ // In theory, we could check whether the chain node is a predecessor of
+ // the load. But that can be very expensive. Instead visit the uses and
+ // make sure they all have smaller node id than the load.
+ int LoadId = LoadNode->getNodeId();
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = UI->use_end(); UI != UE; ++UI) {
+ if (UI.getUse().getResNo() != 0)
+ continue;
+ if (UI->getNodeId() > LoadId)
+ return false;
+ }
+
+ ChainOps.push_back(Op);
+ }
+
+ if (ChainCheck)
+ // Make a new TokenFactor with all the other input chains except
+ // for the load.
+ InputChain = CurDAG->getNode(ISD::TokenFactor, Chain.getDebugLoc(),
+ MVT::Other, &ChainOps[0], ChainOps.size());
+ }
+ if (!ChainCheck)
+ return false;
+
+ return true;
+}
+
+/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory
+/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC.
+static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
+ if (Opc == X86ISD::DEC) {
+ if (LdVT == MVT::i64) return X86::DEC64m;
+ if (LdVT == MVT::i32) return X86::DEC32m;
+ if (LdVT == MVT::i16) return X86::DEC16m;
+ if (LdVT == MVT::i8) return X86::DEC8m;
+ } else {
+ assert(Opc == X86ISD::INC && "unrecognized opcode");
+ if (LdVT == MVT::i64) return X86::INC64m;
+ if (LdVT == MVT::i32) return X86::INC32m;
+ if (LdVT == MVT::i16) return X86::INC16m;
+ if (LdVT == MVT::i8) return X86::INC8m;
+ }
+ llvm_unreachable("unrecognized size for LdVT");
+}
+
+/// SelectGather - Customized ISel for GATHER operations.
+///
+SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
+ // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
+ SDValue Chain = Node->getOperand(0);
+ SDValue VSrc = Node->getOperand(2);
+ SDValue Base = Node->getOperand(3);
+ SDValue VIdx = Node->getOperand(4);
+ SDValue VMask = Node->getOperand(5);
+ ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
+ if (!Scale)
+ return 0;
+
+ SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
+ MVT::Other);
+
+ // Memory Operands: Base, Scale, Index, Disp, Segment
+ SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue Segment = CurDAG->getRegister(0, MVT::i32);
+ const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
+ Disp, Segment, VMask, Chain};
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ VTs, Ops, array_lengthof(Ops));
+ // Node has 2 outputs: VDst and MVT::Other.
+ // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
+ // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
+ // of ResNode.
+ ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
+ return ResNode;
+}
+
SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
EVT NVT = Node->getValueType(0);
unsigned Opc, MOpc;
switch (Opcode) {
default: break;
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_avx2_gather_d_pd:
+ case Intrinsic::x86_avx2_gather_d_pd_256:
+ case Intrinsic::x86_avx2_gather_q_pd:
+ case Intrinsic::x86_avx2_gather_q_pd_256:
+ case Intrinsic::x86_avx2_gather_d_ps:
+ case Intrinsic::x86_avx2_gather_d_ps_256:
+ case Intrinsic::x86_avx2_gather_q_ps:
+ case Intrinsic::x86_avx2_gather_q_ps_256:
+ case Intrinsic::x86_avx2_gather_d_q:
+ case Intrinsic::x86_avx2_gather_d_q_256:
+ case Intrinsic::x86_avx2_gather_q_q:
+ case Intrinsic::x86_avx2_gather_q_q_256:
+ case Intrinsic::x86_avx2_gather_d_d:
+ case Intrinsic::x86_avx2_gather_d_d_256:
+ case Intrinsic::x86_avx2_gather_q_d:
+ case Intrinsic::x86_avx2_gather_q_d_256: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic");
+ case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break;
+ case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
+ case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break;
+ case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
+ case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break;
+ case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
+ case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break;
+ case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
+ case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break;
+ case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break;
+ case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break;
+ case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break;
+ case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break;
+ case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break;
+ case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
+ case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
+ }
+ SDNode *RetVal = SelectGather(Node, Opc);
+ if (RetVal)
+ // We already called ReplaceUses inside SelectGather.
+ return NULL;
+ break;
+ }
+ }
+ break;
+ }
case X86ISD::GlobalBaseReg:
return getGlobalBaseReg();
+
case X86ISD::ATOMOR64_DAG:
- return SelectAtomic64(Node, X86::ATOMOR6432);
case X86ISD::ATOMXOR64_DAG:
- return SelectAtomic64(Node, X86::ATOMXOR6432);
case X86ISD::ATOMADD64_DAG:
- return SelectAtomic64(Node, X86::ATOMADD6432);
case X86ISD::ATOMSUB64_DAG:
- return SelectAtomic64(Node, X86::ATOMSUB6432);
case X86ISD::ATOMNAND64_DAG:
- return SelectAtomic64(Node, X86::ATOMNAND6432);
case X86ISD::ATOMAND64_DAG:
- return SelectAtomic64(Node, X86::ATOMAND6432);
- case X86ISD::ATOMSWAP64_DAG:
- return SelectAtomic64(Node, X86::ATOMSWAP6432);
+ case X86ISD::ATOMSWAP64_DAG: {
+ unsigned Opc;
+ switch (Opcode) {
+ default: llvm_unreachable("Impossible intrinsic");
+ case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
+ case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
+ case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
+ case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
+ case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
+ case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
+ case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
+ }
+ SDNode *RetVal = SelectAtomic64(Node, Opc);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
case ISD::ATOMIC_LOAD_ADD: {
SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
getI8Imm(ShlVal));
- break;
}
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
- N0, SDValue()).getValue(1);
+ N0, SDValue()).getValue(1);
if (foldedLoad) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- LoReg, NVT, InFlag);
+ LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
// On x86-32, only the ABCD registers have 8-bit subregisters.
if (!Subtarget->is64Bit()) {
- TargetRegisterClass *TRC = 0;
+ const TargetRegisterClass *TRC;
switch (N0.getValueType().getSimpleVT().SimpleTy) {
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
SDValue Reg = N0.getNode()->getOperand(0);
// Put the value in an ABCD register.
- TargetRegisterClass *TRC = 0;
+ const TargetRegisterClass *TRC;
switch (N0.getValueType().getSimpleVT().SimpleTy) {
case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
break;
}
case ISD::STORE: {
+ // Change a chain of {load; incr or dec; store} of the same value into
+ // a simple increment or decrement through memory of that value, if the
+ // uses of the modified value and its address are suitable.
// The DEC64m tablegen pattern is currently not able to match the case where
- // the EFLAGS on the original DEC are used.
- // we'll need to improve tablegen to allow flags to be transferred from a
+ // the EFLAGS on the original DEC are used. (This also applies to
+ // {INC,DEC}X{64,32,16,8}.)
+ // We'll need to improve tablegen to allow flags to be transferred from a
// node in the pattern to the result node. probably with a new keyword
// for example, we have this
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
// (transferrable EFLAGS)]>;
+
StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
- SDValue Chain = StoreNode->getOperand(0);
SDValue StoredVal = StoreNode->getOperand(1);
- SDValue Address = StoreNode->getOperand(2);
- SDValue Undef = StoreNode->getOperand(3);
-
- if (StoreNode->getMemOperand()->getSize() != 8 ||
- Undef->getOpcode() != ISD::UNDEF ||
- Chain->getOpcode() != ISD::LOAD ||
- StoredVal->getOpcode() != X86ISD::DEC ||
- StoredVal.getResNo() != 0 ||
- StoredVal->getOperand(0).getNode() != Chain.getNode())
- break;
-
- //OPC_CheckPredicate, 1, // Predicate_nontemporalstore
- if (StoreNode->isNonTemporal())
- break;
+ unsigned Opc = StoredVal->getOpcode();
- LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode());
- if (LoadNode->getOperand(1) != Address ||
- LoadNode->getOperand(2) != Undef)
+ LoadSDNode *LoadNode = 0;
+ SDValue InputChain;
+ if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
+ LoadNode, InputChain))
break;
- if (!ISD::isNormalLoad(LoadNode))
- break;
-
- if (!ISD::isNormalStore(StoreNode))
- break;
-
- // check load chain has only one use (from the store)
- if (!Chain.hasOneUse())
- break;
-
- // Merge the input chains if they are not intra-pattern references.
- SDValue InputChain = LoadNode->getOperand(0);
-
SDValue Base, Scale, Index, Disp, Segment;
if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
Base, Scale, Index, Disp, Segment))
MemOp[0] = StoreNode->getMemOperand();
MemOp[1] = LoadNode->getMemOperand();
const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
- MachineSDNode *Result = CurDAG->getMachineNode(X86::DEC64m,
+ EVT LdVT = LoadNode->getMemoryVT();
+ unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
+ MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
Node->getDebugLoc(),
MVT::i32, MVT::Other, Ops,
array_lengthof(Ops));
/// X86-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
- llvm::CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel) {
return new X86DAGToDAGISel(TM, OptLevel);
}