X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelDAGToDAG.cpp;h=7c973c2e55db38b711f1357fddf788ebab31e940;hb=875710a2fd6b3c4f814961582594bd5c1cdb493a;hp=d836c29d6bd8c3f1ccf44cc27fdad18ab8fc9a01;hpb=0767dc546ec94f46b5c4292645dbf096fe3100cf;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index d836c29d6bd..7c973c2e55d 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -12,33 +12,32 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Type.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; +#define DEBUG_TYPE "x86-isel" + STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); //===----------------------------------------------------------------------===// @@ -73,16 +72,18 @@ namespace { X86ISelAddressMode() : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), - Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0), - SymbolFlags(X86II::MO_NO_FLAG) { + Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), + JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) { } bool hasSymbolicDisplacement() const { - return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0; + return GV != nullptr || CP != nullptr || ES != nullptr || + JT != -1 || BlockAddr != nullptr; } bool hasBaseOrIndexReg() const { - return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0; + return BaseType == FrameIndexBase || + IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; } /// isRIPRelative - Return true if this addressing mode is already RIP @@ -104,14 +105,14 @@ namespace { void dump() { dbgs() << "X86ISelAddressMode " << this << '\n'; dbgs() << "Base_Reg "; - if (Base_Reg.getNode() != 0) + if (Base_Reg.getNode()) Base_Reg.getNode()->dump(); else dbgs() << "nul"; dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' << " Scale" << Scale << '\n' << "IndexReg "; - if (IndexReg.getNode() != 0) + if (IndexReg.getNode()) IndexReg.getNode()->dump(); else dbgs() << "nul"; @@ -143,11 +144,7 @@ namespace { /// ISel - X86 specific code to select X86 machine instructions for /// SelectionDAG operations. /// - class X86DAGToDAGISel : public SelectionDAGISel { - /// X86Lowering - This object fully describes how to lower LLVM code to an - /// X86-specific SelectionDAG. - const X86TargetLowering &X86Lowering; - + class X86DAGToDAGISel final : public SelectionDAGISel { /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; @@ -159,19 +156,25 @@ namespace { public: explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), - X86Lowering(*tm.getTargetLowering()), Subtarget(&tm.getSubtarget()), OptForSize(false) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "X86 DAG->DAG Instruction Selection"; } - virtual void EmitFunctionEntryCode(); + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + Subtarget = &TM.getSubtarget(); + SelectionDAGISel::runOnMachineFunction(MF); + return true; + } - virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; + void EmitFunctionEntryCode() override; - virtual void PreprocessISelDAG(); + bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; + + void PreprocessISelDAG() override; inline bool immSext8(SDNode *N) const { return isInt<8>(cast(N)->getSExtValue()); @@ -188,11 +191,10 @@ namespace { #include "X86GenDAGISel.inc" private: - SDNode *Select(SDNode *N); + SDNode *Select(SDNode *N) override; SDNode *SelectGather(SDNode *N, unsigned Opc); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); - SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); - SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); + SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT); bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); @@ -204,9 +206,13 @@ namespace { bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); + bool SelectMOV64Imm32(SDValue N, SDValue &Imm); bool SelectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); + bool SelectLEA64_32Addr(SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, SDValue &Disp, + SDValue &Segment); bool SelectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); @@ -223,9 +229,9 @@ namespace { /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps); + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) override; void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); @@ -233,14 +239,15 @@ namespace { SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? - CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) : + CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, + getTargetLowering()->getPointerTy()) : AM.Base_Reg; Scale = getI8Imm(AM.Scale); Index = AM.IndexReg; // These are 32-bit even in 64-bit mode since RIP relative offset // is 32-bit. if (AM.GV) - Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(), + Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), MVT::i32, AM.Disp, AM.SymbolFlags); else if (AM.CP) @@ -284,14 +291,14 @@ namespace { /// getTargetMachine - Return a reference to the TargetMachine, casted /// to the target-specific type. - const X86TargetMachine &getTargetMachine() { + const X86TargetMachine &getTargetMachine() const { return static_cast(TM); } /// getInstrInfo - Return a reference to the TargetInstrInfo, casted /// to the target-specific type. - const X86InstrInfo *getInstrInfo() { - return getTargetMachine().getInstrInfo(); + const X86InstrInfo *getInstrInfo() const { + return getTargetMachine().getSubtargetImpl()->getInstrInfo(); } }; } @@ -347,7 +354,7 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { // addl %gs:0, %eax // if the block also has an access to a second TLS address this will save // a load. - // FIXME: This is probably also true for non TLS addresses. + // FIXME: This is probably also true for non-TLS addresses. if (Op1.getOpcode() == X86ISD::Wrapper) { SDValue Val = Op1.getOperand(0); if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) @@ -363,7 +370,7 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { /// MoveBelowCallOrigChain - Replace the original chain operand of the call with /// load's chain operand and move load below the call's chain operand. static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, - SDValue Call, SDValue OrigChain) { + SDValue Call, SDValue OrigChain) { SmallVector Ops; SDValue Chain = OrigChain.getOperand(0); if (Chain.getNode() == Load.getNode()) @@ -377,21 +384,22 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, else Ops.push_back(Chain.getOperand(i)); SDValue NewChain = - CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(), - MVT::Other, &Ops[0], Ops.size()); + CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops); Ops.clear(); Ops.push_back(NewChain); } for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i) Ops.push_back(OrigChain.getOperand(i)); - CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); + + unsigned NumOps = Call.getNode()->getNumOperands(); Ops.clear(); Ops.push_back(SDValue(Load.getNode(), 1)); - for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) + for (unsigned i = 1, e = NumOps; i != e; ++i) Ops.push_back(Call.getOperand(i)); - CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Call.getNode(), Ops); } /// isCalleeLoad - Return true if call address is a load and it can be @@ -400,6 +408,10 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, /// In the case of a tail call, there isn't a callseq node between the call /// chain and the load. static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { + // The transformation is somewhat dangerous if the call's chain was glued to + // the call. After MoveBelowOrigChain the load is moved between the call and + // the chain, this can create a cycle if the load is not folded. So it is + // *really* important that we are sure the load will be folded. if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) return false; LoadSDNode *LD = dyn_cast(Callee.getNode()); @@ -418,6 +430,11 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { if (!Chain.getNumOperands()) return false; + // Since we are not checking for AA here, conservatively abort if the chain + // writes to memory. It's not safe to move the callee (a load) across a store. + if (isa(Chain.getNode()) && + cast(Chain.getNode())->writeMem()) + return false; if (Chain.getOperand(0).getNode() == Callee.getNode()) return true; if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && @@ -429,15 +446,21 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); + OptForSize = MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. if (OptLevel != CodeGenOpt::None && - (N->getOpcode() == X86ISD::CALL || - N->getOpcode() == X86ISD::TC_RETURN)) { + // Only does this when target favors doesn't favor register indirect + // call. + ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || + (N->getOpcode() == X86ISD::TC_RETURN && + // Only does this if load can be folded into TC_RETURN. + (Subtarget->is64Bit() || + getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { /// Also try moving call address load from outside callseq_start to just /// before the call to allow it to be folded. /// @@ -478,8 +501,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; - EVT SrcVT = N->getOperand(0).getValueType(); - EVT DstVT = N->getValueType(0); + MVT SrcVT = N->getOperand(0).getSimpleValueType(); + MVT DstVT = N->getSimpleValueType(0); // If any of the sources are vectors, no fp stack involved. if (SrcVT.isVector() || DstVT.isVector()) @@ -487,8 +510,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // If the source and destination are SSE registers, then this is a legal // conversion that should not be lowered. - bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); - bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); + const X86TargetLowering *X86Lowering = + static_cast(getTargetLowering()); + bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); + bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); if (SrcIsSSE && DstIsSSE) continue; @@ -504,14 +529,14 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // Here we could have an FP stack truncation or an FPStack <-> SSE convert. // FPStack has extload and truncstore. SSE can fold direct loads into other // operations. Based on this, decide what we want to do. - EVT MemVT; + MVT MemVT; if (N->getOpcode() == ISD::FP_ROUND) MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. else MemVT = SrcIsSSE ? SrcVT : DstVT; SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, @@ -520,7 +545,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { false, false, 0); SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, MachinePointerInfo(), - MemVT, false, false, 0); + MemVT, false, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the // extload we created. This will cause general havok on the dag because @@ -541,7 +566,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { /// the main function. void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI) { - const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); if (Subtarget->isTargetCygMing()) { unsigned CallOp = Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; @@ -596,7 +621,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ // gs:0 (or fs:0 on X86-64) contains its own address. // For more information see http://people.redhat.com/drepper/tls.pdf if (ConstantSDNode *C = dyn_cast(Address)) - if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 && + if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr && Subtarget->isTargetLinux()) switch (N->getPointerInfo().getAddrSpace()) { case 256: @@ -717,7 +742,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { // a smaller encoding and avoids a scaled-index. if (AM.Scale == 2 && AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == 0) { + AM.Base_Reg.getNode() == nullptr) { AM.Base_Reg = AM.IndexReg; AM.Scale = 1; } @@ -729,8 +754,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { Subtarget->is64Bit() && AM.Scale == 1 && AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == 0 && - AM.IndexReg.getNode() == 0 && + AM.Base_Reg.getNode() == nullptr && + AM.IndexReg.getNode() == nullptr && AM.SymbolFlags == X86II::MO_NO_FLAG && AM.hasSymbolicDisplacement()) AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); @@ -768,8 +793,8 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, Mask != (0xffu << ScaleLog)) return true; - EVT VT = N.getValueType(); - DebugLoc DL = N.getDebugLoc(); + MVT VT = N.getSimpleValueType(); + SDLoc DL(N); SDValue Eight = DAG.getConstant(8, MVT::i8); SDValue NewMask = DAG.getConstant(0xff, VT); SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight); @@ -816,8 +841,8 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) return true; - EVT VT = N.getValueType(); - DebugLoc DL = N.getDebugLoc(); + MVT VT = N.getSimpleValueType(); + SDLoc DL(N); SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT); SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); @@ -873,8 +898,8 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, return true; unsigned ShiftAmt = Shift.getConstantOperandVal(1); - unsigned MaskLZ = CountLeadingZeros_64(Mask); - unsigned MaskTZ = CountTrailingZeros_64(Mask); + unsigned MaskLZ = countLeadingZeros(Mask); + unsigned MaskTZ = countTrailingZeros(Mask); // The amount of shift we're trying to fit into the addressing mode is taken // from the trailing zeros of the mask. @@ -889,7 +914,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, // Scale the leading zero count down based on the actual size of the value. // Also scale it down based on the size of the shift. - MaskLZ -= (64 - X.getValueSizeInBits()) + ShiftAmt; + MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; // The final check is to ensure that any masked out high bits of X are // already known to be zero. Otherwise, the mask has a semantic impact @@ -899,31 +924,31 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, // replace them with zero extensions cheaply if necessary. bool ReplacingAnyExtend = false; if (X.getOpcode() == ISD::ANY_EXTEND) { - unsigned ExtendBits = - X.getValueSizeInBits() - X.getOperand(0).getValueSizeInBits(); + unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - + X.getOperand(0).getSimpleValueType().getSizeInBits(); // Assume that we'll replace the any-extend with a zero-extend, and // narrow the search to the extended value. X = X.getOperand(0); MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; ReplacingAnyExtend = true; } - APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(), - MaskLZ); + APInt MaskedHighBits = + APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(X, KnownZero, KnownOne); + DAG.computeKnownBits(X, KnownZero, KnownOne); if (MaskedHighBits != KnownZero) return true; // We've identified a pattern that can be transformed into a single shift // and an addressing mode. Make it so. - EVT VT = N.getValueType(); + MVT VT = N.getSimpleValueType(); if (ReplacingAnyExtend) { assert(X.getValueType() != VT); // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. - SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, X.getDebugLoc(), VT, X); + SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); InsertDAGNode(DAG, N, NewX); X = NewX; } - DebugLoc DL = N.getDebugLoc(); + SDLoc DL(N); SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8); SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8); @@ -947,7 +972,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { - DebugLoc dl = N.getDebugLoc(); + SDLoc dl(N); DEBUG({ dbgs() << "MatchAddress: "; AM.dump(); @@ -993,7 +1018,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::FrameIndex: if (AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == 0 && + AM.Base_Reg.getNode() == nullptr && (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { AM.BaseType = X86ISelAddressMode::FrameIndexBase; AM.Base_FrameIndex = cast(N)->getIndex(); @@ -1002,7 +1027,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, break; case ISD::SHL: - if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) + if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; if (ConstantSDNode @@ -1031,12 +1056,12 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.IndexReg = ShVal; return false; } - break; } + break; case ISD::SRL: { // Scale must not be used already. - if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; + if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; SDValue And = N.getOperand(0); if (And.getOpcode() != ISD::AND) break; @@ -1044,7 +1069,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // We only handle up to 64-bit values here as those are what matter for // addressing mode optimizations. - if (X.getValueSizeInBits() > 64) break; + if (X.getSimpleValueType().getSizeInBits() > 64) break; // The mask used for the transform is expected to be post-shift, but we // found the shift first so just apply the shift to the mask before passing @@ -1070,8 +1095,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case X86ISD::MUL_IMM: // X*[3,5,9] -> X+X*[2,4,8] if (AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == 0 && - AM.IndexReg.getNode() == 0) { + AM.Base_Reg.getNode() == nullptr && + AM.IndexReg.getNode() == nullptr) { if (ConstantSDNode *CN = dyn_cast(N.getNode()->getOperand(1))) if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || @@ -1221,7 +1246,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // with a constant to enable use of the scaled offset field. // Scale must not be used already. - if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; + if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; SDValue Shift = N.getOperand(0); if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break; @@ -1229,7 +1254,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // We only handle up to 64-bit values here as those are what matter for // addressing mode optimizations. - if (X.getValueSizeInBits() > 64) break; + if (X.getSimpleValueType().getSizeInBits() > 64) break; if (!isa(N.getOperand(1))) break; @@ -1260,7 +1285,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { // Is the base register already occupied? if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { // If so, check to see if the scale index register is set. - if (AM.IndexReg.getNode() == 0) { + if (!AM.IndexReg.getNode()) { AM.IndexReg = N; AM.Scale = 1; return false; @@ -1293,7 +1318,9 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, // that are not a MemSDNode, and thus don't have proper addrspace info. Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores - Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme + Parent->getOpcode() != X86ISD::TLSCALL && // Fixme + Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp + Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp unsigned AddrSpace = cast(Parent)->getPointerInfo().getAddrSpace(); // AddrSpace 256 -> GS, 257 -> FS. @@ -1306,7 +1333,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, if (MatchAddress(N, AM)) return false; - EVT VT = N.getValueType(); + MVT VT = N.getSimpleValueType(); if (AM.BaseType == X86ISelAddressMode::RegBase) { if (!AM.Base_Reg.getNode()) AM.Base_Reg = CurDAG->getRegister(0, VT); @@ -1365,6 +1392,71 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, } +bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) { + if (const ConstantSDNode *CN = dyn_cast(N)) { + uint64_t ImmVal = CN->getZExtValue(); + if ((uint32_t)ImmVal != (uint64_t)ImmVal) + return false; + + Imm = CurDAG->getTargetConstant(ImmVal, MVT::i64); + return true; + } + + // In static codegen with small code model, we can get the address of a label + // into a register with 'movl'. TableGen has already made sure we're looking + // at a label of some kind. + assert(N->getOpcode() == X86ISD::Wrapper && + "Unexpected node type for MOV32ri64"); + N = N.getOperand(0); + + if (N->getOpcode() != ISD::TargetConstantPool && + N->getOpcode() != ISD::TargetJumpTable && + N->getOpcode() != ISD::TargetGlobalAddress && + N->getOpcode() != ISD::TargetExternalSymbol && + N->getOpcode() != ISD::TargetBlockAddress) + return false; + + Imm = N; + return TM.getCodeModel() == CodeModel::Small; +} + +bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, + SDValue &Disp, SDValue &Segment) { + if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment)) + return false; + + SDLoc DL(N); + RegisterSDNode *RN = dyn_cast(Base); + if (RN && RN->getReg() == 0) + Base = CurDAG->getRegister(0, MVT::i64); + else if (Base.getValueType() == MVT::i32 && !dyn_cast(N)) { + // Base could already be %rip, particularly in the x32 ABI. + Base = SDValue(CurDAG->getMachineNode( + TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, + CurDAG->getTargetConstant(0, MVT::i64), + Base, + CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), + 0); + } + + RN = dyn_cast(Index); + if (RN && RN->getReg() == 0) + Index = CurDAG->getRegister(0, MVT::i64); + else { + assert(Index.getValueType() == MVT::i32 && + "Expect to be extending 32-bit registers for use in LEA"); + Index = SDValue(CurDAG->getMachineNode( + TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, + CurDAG->getTargetConstant(0, MVT::i64), + Index, + CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), + 0); + } + + return true; +} + /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, @@ -1383,7 +1475,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, assert (T == AM.Segment); AM.Segment = Copy; - EVT VT = N.getValueType(); + MVT VT = N.getSimpleValueType(); unsigned Complexity = 0; if (AM.BaseType == X86ISelAddressMode::RegBase) if (AM.Base_Reg.getNode()) @@ -1472,7 +1564,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, /// SDNode *X86DAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); - return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); + return CurDAG->getRegister(GlobalBaseReg, + getTargetLowering()->getPointerTy()).getNode(); } SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { @@ -1480,172 +1573,26 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue In1 = Node->getOperand(1); SDValue In2L = Node->getOperand(2); SDValue In2H = Node->getOperand(3); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) - return NULL; + return nullptr; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(Node)->getMemOperand(); const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; - SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - MVT::i32, MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), + MVT::i32, MVT::i32, MVT::Other, Ops); cast(ResNode)->setMemRefs(MemOp, MemOp + 1); return ResNode; } -// FIXME: Figure out some way to unify this with the 'or' and other code -// below. -SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { - if (Node->hasAnyUseOfValue(0)) - return 0; - - // Optimize common patterns for __sync_add_and_fetch and - // __sync_sub_and_fetch where the result is not used. This allows us - // to use "lock" version of add, sub, inc, dec instructions. - // FIXME: Do not use special instructions but instead add the "lock" - // prefix to the target node somehow. The extra information will then be - // transferred to machine instruction and it denotes the prefix. - SDValue Chain = Node->getOperand(0); - SDValue Ptr = Node->getOperand(1); - SDValue Val = Node->getOperand(2); - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) - return 0; - - bool isInc = false, isDec = false, isSub = false, isCN = false; - ConstantSDNode *CN = dyn_cast(Val); - if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) { - isCN = true; - int64_t CNVal = CN->getSExtValue(); - if (CNVal == 1) - isInc = true; - else if (CNVal == -1) - isDec = true; - else if (CNVal >= 0) - Val = CurDAG->getTargetConstant(CNVal, NVT); - else { - isSub = true; - Val = CurDAG->getTargetConstant(-CNVal, NVT); - } - } else if (Val.hasOneUse() && - Val.getOpcode() == ISD::SUB && - X86::isZeroNode(Val.getOperand(0))) { - isSub = true; - Val = Val.getOperand(1); - } - - DebugLoc dl = Node->getDebugLoc(); - unsigned Opc = 0; - switch (NVT.getSimpleVT().SimpleTy) { - default: return 0; - case MVT::i8: - if (isInc) - Opc = X86::LOCK_INC8m; - else if (isDec) - Opc = X86::LOCK_DEC8m; - else if (isSub) { - if (isCN) - Opc = X86::LOCK_SUB8mi; - else - Opc = X86::LOCK_SUB8mr; - } else { - if (isCN) - Opc = X86::LOCK_ADD8mi; - else - Opc = X86::LOCK_ADD8mr; - } - break; - case MVT::i16: - if (isInc) - Opc = X86::LOCK_INC16m; - else if (isDec) - Opc = X86::LOCK_DEC16m; - else if (isSub) { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB16mi8; - else - Opc = X86::LOCK_SUB16mi; - } else - Opc = X86::LOCK_SUB16mr; - } else { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD16mi8; - else - Opc = X86::LOCK_ADD16mi; - } else - Opc = X86::LOCK_ADD16mr; - } - break; - case MVT::i32: - if (isInc) - Opc = X86::LOCK_INC32m; - else if (isDec) - Opc = X86::LOCK_DEC32m; - else if (isSub) { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB32mi8; - else - Opc = X86::LOCK_SUB32mi; - } else - Opc = X86::LOCK_SUB32mr; - } else { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD32mi8; - else - Opc = X86::LOCK_ADD32mi; - } else - Opc = X86::LOCK_ADD32mr; - } - break; - case MVT::i64: - if (isInc) - Opc = X86::LOCK_INC64m; - else if (isDec) - Opc = X86::LOCK_DEC64m; - else if (isSub) { - Opc = X86::LOCK_SUB64mr; - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB64mi8; - else if (i64immSExt32(Val.getNode())) - Opc = X86::LOCK_SUB64mi32; - } - } else { - Opc = X86::LOCK_ADD64mr; - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD64mi8; - else if (i64immSExt32(Val.getNode())) - Opc = X86::LOCK_ADD64mi32; - } - } - break; - } - - SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, NVT), 0); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(Node)->getMemOperand(); - if (isInc || isDec) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0); - cast(Ret)->setMemRefs(MemOp, MemOp + 1); - SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); - } else { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); - cast(Ret)->setMemRefs(MemOp, MemOp + 1); - SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); - } -} - +/// Atomic opcode table +/// enum AtomicOpc { + ADD, + SUB, + INC, + DEC, OR, AND, XOR, @@ -1668,6 +1615,58 @@ enum AtomicSz { }; static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { + { + X86::LOCK_ADD8mi, + X86::LOCK_ADD8mr, + X86::LOCK_ADD16mi8, + X86::LOCK_ADD16mi, + X86::LOCK_ADD16mr, + X86::LOCK_ADD32mi8, + X86::LOCK_ADD32mi, + X86::LOCK_ADD32mr, + X86::LOCK_ADD64mi8, + X86::LOCK_ADD64mi32, + X86::LOCK_ADD64mr, + }, + { + X86::LOCK_SUB8mi, + X86::LOCK_SUB8mr, + X86::LOCK_SUB16mi8, + X86::LOCK_SUB16mi, + X86::LOCK_SUB16mr, + X86::LOCK_SUB32mi8, + X86::LOCK_SUB32mi, + X86::LOCK_SUB32mr, + X86::LOCK_SUB64mi8, + X86::LOCK_SUB64mi32, + X86::LOCK_SUB64mr, + }, + { + 0, + X86::LOCK_INC8m, + 0, + 0, + X86::LOCK_INC16m, + 0, + 0, + X86::LOCK_INC32m, + 0, + 0, + X86::LOCK_INC64m, + }, + { + 0, + X86::LOCK_DEC8m, + 0, + 0, + X86::LOCK_DEC16m, + 0, + 0, + X86::LOCK_DEC32m, + 0, + 0, + X86::LOCK_DEC64m, + }, { X86::LOCK_OR8mi, X86::LOCK_OR8mr, @@ -1679,7 +1678,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_OR32mr, X86::LOCK_OR64mi8, X86::LOCK_OR64mi32, - X86::LOCK_OR64mr + X86::LOCK_OR64mr, }, { X86::LOCK_AND8mi, @@ -1692,7 +1691,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_AND32mr, X86::LOCK_AND64mi8, X86::LOCK_AND64mi32, - X86::LOCK_AND64mr + X86::LOCK_AND64mr, }, { X86::LOCK_XOR8mi, @@ -1705,28 +1704,86 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_XOR32mr, X86::LOCK_XOR64mi8, X86::LOCK_XOR64mi32, - X86::LOCK_XOR64mr + X86::LOCK_XOR64mr, } }; -SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { +// Return the target constant operand for atomic-load-op and do simple +// translations, such as from atomic-load-add to lock-sub. The return value is +// one of the following 3 cases: +// + target-constant, the operand could be supported as a target constant. +// + empty, the operand is not needed any more with the new op selected. +// + non-empty, otherwise. +static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, + SDLoc dl, + enum AtomicOpc &Op, MVT NVT, + SDValue Val) { + if (ConstantSDNode *CN = dyn_cast(Val)) { + int64_t CNVal = CN->getSExtValue(); + // Quit if not 32-bit imm. + if ((int32_t)CNVal != CNVal) + return Val; + // For atomic-load-add, we could do some optimizations. + if (Op == ADD) { + // Translate to INC/DEC if ADD by 1 or -1. + if ((CNVal == 1) || (CNVal == -1)) { + Op = (CNVal == 1) ? INC : DEC; + // No more constant operand after being translated into INC/DEC. + return SDValue(); + } + // Translate to SUB if ADD by negative value. + if (CNVal < 0) { + Op = SUB; + CNVal = -CNVal; + } + } + return CurDAG->getTargetConstant(CNVal, NVT); + } + + // If the value operand is single-used, try to optimize it. + if (Op == ADD && Val.hasOneUse()) { + // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x). + if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) { + Op = SUB; + return Val.getOperand(1); + } + // A special case for i16, which needs truncating as, in most cases, it's + // promoted to i32. We will translate + // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x)) + if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 && + Val.getOperand(0).getOpcode() == ISD::SUB && + X86::isZeroNode(Val.getOperand(0).getOperand(0))) { + Op = SUB; + Val = Val.getOperand(0); + return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT, + Val.getOperand(1)); + } + } + + return Val; +} + +SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { if (Node->hasAnyUseOfValue(0)) - return 0; + return nullptr; + + SDLoc dl(Node); // Optimize common patterns for __sync_or_and_fetch and similar arith // operations where the result is not used. This allows us to use the "lock" // version of the arithmetic instruction. - // FIXME: Same as for 'add' and 'sub', try to merge those down here. SDValue Chain = Node->getOperand(0); SDValue Ptr = Node->getOperand(1); SDValue Val = Node->getOperand(2); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) - return 0; + return nullptr; // Which index into the table. enum AtomicOpc Op; switch (Node->getOpcode()) { + default: + return nullptr; case ISD::ATOMIC_LOAD_OR: Op = OR; break; @@ -1736,20 +1793,18 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { case ISD::ATOMIC_LOAD_XOR: Op = XOR; break; - default: - return 0; + case ISD::ATOMIC_LOAD_ADD: + Op = ADD; + break; } - bool isCN = false; - ConstantSDNode *CN = dyn_cast(Val); - if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) { - isCN = true; - Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT); - } + Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val); + bool isUnOp = !Val.getNode(); + bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); unsigned Opc = 0; - switch (NVT.getSimpleVT().SimpleTy) { - default: return 0; + switch (NVT.SimpleTy) { + default: return nullptr; case MVT::i8: if (isCN) Opc = AtomicOpcTbl[Op][ConstantI8]; @@ -1787,16 +1842,21 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { assert(Opc != 0 && "Invalid arith lock transform!"); - DebugLoc dl = Node->getDebugLoc(); + SDValue Ret; SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(Node)->getMemOperand(); - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); + if (isUnOp) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); + } else { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); + } cast(Ret)->setMemRefs(MemOp, MemOp + 1); SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); + return CurDAG->getMergeValues(RetVals, dl).getNode(); } /// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has @@ -1938,8 +1998,8 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, if (ChainCheck) // Make a new TokenFactor with all the other input chains except // for the load. - InputChain = CurDAG->getNode(ISD::TokenFactor, Chain.getDebugLoc(), - MVT::Other, &ChainOps[0], ChainOps.size()); + InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), + MVT::Other, ChainOps); } if (!ChainCheck) return false; @@ -1976,7 +2036,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { SDValue VMask = Node->getOperand(5); ConstantSDNode *Scale = dyn_cast(Node->getOperand(6)); if (!Scale) - return 0; + return nullptr; SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(), MVT::Other); @@ -1986,8 +2046,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { SDValue Segment = CurDAG->getRegister(0, MVT::i32); const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, Disp, Segment, VMask, Chain}; - SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - VTs, Ops, array_lengthof(Ops)); + SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), VTs, Ops); // Node has 2 outputs: VDst and MVT::Other. // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. // We replace VDst of Node with VDst of ResNode, and Other of Node with Other @@ -1998,16 +2057,17 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { } SDNode *X86DAGToDAGISel::Select(SDNode *Node) { - EVT NVT = Node->getValueType(0); + MVT NVT = Node->getSimpleValueType(0); unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); if (Node->isMachineOpcode()) { DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); - return NULL; // Already selected. + Node->setNodeId(-1); + return nullptr; // Already selected. } switch (Opcode) { @@ -2032,6 +2092,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::x86_avx2_gather_d_d_256: case Intrinsic::x86_avx2_gather_q_d: case Intrinsic::x86_avx2_gather_q_d_256: { + if (!Subtarget->hasAVX2()) + break; unsigned Opc; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); @@ -2055,7 +2117,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDNode *RetVal = SelectGather(Node, Opc); if (RetVal) // We already called ReplaceUses inside SelectGather. - return NULL; + return nullptr; break; } } @@ -2065,39 +2127,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return getGlobalBaseReg(); - case X86ISD::ATOMOR64_DAG: - case X86ISD::ATOMXOR64_DAG: - case X86ISD::ATOMADD64_DAG: - case X86ISD::ATOMSUB64_DAG: - case X86ISD::ATOMNAND64_DAG: - case X86ISD::ATOMAND64_DAG: - case X86ISD::ATOMSWAP64_DAG: { - unsigned Opc; - switch (Opcode) { - default: llvm_unreachable("Impossible opcode"); - case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break; - case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break; - case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break; - case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; - case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; - case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; - case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; - } - SDNode *RetVal = SelectAtomic64(Node, Opc); - if (RetVal) - return RetVal; - break; - } - - case ISD::ATOMIC_LOAD_ADD: { - SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); - if (RetVal) - return RetVal; - break; - } case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: { + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_ADD: { SDNode *RetVal = SelectAtomicLoadArith(Node, NVT); if (RetVal) return RetVal; @@ -2133,7 +2166,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; unsigned ShlOp, Op; - EVT CstVT = NVT; + MVT CstVT = NVT; // Check the minimum bitwidth for the new constant. // TODO: AND32ri is the same as AND64ri32 with zext imm. @@ -2148,7 +2181,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (NVT == CstVT) break; - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i32: assert(CstVT == MVT::i8); @@ -2185,7 +2218,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue N1 = Node->getOperand(1); unsigned LoReg; - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break; case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break; @@ -2198,12 +2231,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); SDValue Ops[] = {N1, InFlag}; - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); - return NULL; + return nullptr; } case ISD::SMUL_LOHI: @@ -2212,16 +2245,19 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue N1 = Node->getOperand(1); bool isSigned = Opcode == ISD::SMUL_LOHI; + bool hasBMI2 = Subtarget->hasBMI2(); if (!isSigned) { - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; - case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; - case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; + case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; + MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; + case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r; + MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; } } else { - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; @@ -2230,13 +2266,31 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } } - unsigned LoReg, HiReg; - switch (NVT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unsupported VT!"); - case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; - case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; - case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; - case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; + unsigned SrcReg, LoReg, HiReg; + switch (Opc) { + default: llvm_unreachable("Unknown MUL opcode!"); + case X86::IMUL8r: + case X86::MUL8r: + SrcReg = LoReg = X86::AL; HiReg = X86::AH; + break; + case X86::IMUL16r: + case X86::MUL16r: + SrcReg = LoReg = X86::AX; HiReg = X86::DX; + break; + case X86::IMUL32r: + case X86::MUL32r: + SrcReg = LoReg = X86::EAX; HiReg = X86::EDX; + break; + case X86::IMUL64r: + case X86::MUL64r: + SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; + break; + case X86::MULX32rr: + SrcReg = X86::EDX; LoReg = HiReg = 0; + break; + case X86::MULX64rr: + SrcReg = X86::RDX; LoReg = HiReg = 0; + break; } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; @@ -2248,22 +2302,43 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { std::swap(N0, N1); } - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, N0, SDValue()).getValue(1); + SDValue ResHi, ResLo; if (foldedLoad) { + SDValue Chain; SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; - SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); - InFlag = SDValue(CNode, 1); + if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + Chain = SDValue(CNode, 2); + InFlag = SDValue(CNode, 3); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + Chain = SDValue(CNode, 0); + InFlag = SDValue(CNode, 1); + } // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + ReplaceUses(N1.getValue(1), Chain); } else { - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag); - InFlag = SDValue(CNode, 0); + SDValue Ops[] = { N1, InFlag }; + if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + InFlag = SDValue(CNode, 2); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + InFlag = SDValue(CNode, 0); + } } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -2288,22 +2363,28 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - LoReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (!ResLo.getNode()) { + assert(LoReg && "Register for low half is not defined!"); + ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, + InFlag); + InFlag = ResLo.getValue(2); + } + ReplaceUses(SDValue(Node, 0), ResLo); + DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (!ResHi.getNode()) { + assert(HiReg && "Register for high half is not defined!"); + ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, + InFlag); + InFlag = ResHi.getValue(2); + } + ReplaceUses(SDValue(Node, 1), ResHi); + DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); } - return NULL; + return nullptr; } case ISD::SDIVREM: @@ -2313,7 +2394,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { bool isSigned = Opcode == ISD::SDIVREM; if (!isSigned) { - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; @@ -2321,7 +2402,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; } } else { - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; @@ -2331,27 +2412,24 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } unsigned LoReg, HiReg, ClrReg; - unsigned ClrOpcode, SExtOpcode; - switch (NVT.getSimpleVT().SimpleTy) { + unsigned SExtOpcode; + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: LoReg = X86::AL; ClrReg = HiReg = X86::AH; - ClrOpcode = 0; SExtOpcode = X86::CBW; break; case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; - ClrOpcode = X86::MOV16r0; ClrReg = X86::DX; + ClrReg = X86::DX; SExtOpcode = X86::CWD; break; case MVT::i32: LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; - ClrOpcode = X86::MOV32r0; SExtOpcode = X86::CDQ; break; case MVT::i64: LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; - ClrOpcode = X86::MOV64r0; SExtOpcode = X86::CQO; break; } @@ -2369,8 +2447,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; Move = SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, - MVT::Other, Ops, - array_lengthof(Ops)), 0); + MVT::Other, Ops), 0); Chain = Move.getValue(1); ReplaceUses(N0.getValue(1), Chain); } else { @@ -2390,8 +2467,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); } else { // Zero out the high part, effectively zero extending the input. - SDValue ClrNode = - SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); + SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0); + switch (NVT.SimpleTy) { + case MVT::i16: + ClrNode = + SDValue(CurDAG->getMachineNode( + TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, + CurDAG->getTargetConstant(X86::sub_16bit, MVT::i32)), + 0); + break; + case MVT::i32: + break; + case MVT::i64: + ClrNode = + SDValue(CurDAG->getMachineNode( + TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, + CurDAG->getTargetConstant(0, MVT::i64), ClrNode, + CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)), + 0); + break; + default: + llvm_unreachable("Unexpected division source"); + } + InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, ClrNode, InFlag).getValue(1); } @@ -2401,8 +2499,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); InFlag = SDValue(CNode, 1); // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); @@ -2413,6 +2510,11 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Prevent use of AH in a REX instruction by referencing AX instead. // Shift it down 8 bits. + // + // The current assumption of the register allocator is that isel + // won't generate explicit references to the GPR8_NOREX registers. If + // the allocator and/or the backend get enhanced to be more robust in + // that regard, this can be, and should be, removed. if (HiReg == X86::AH && Subtarget->is64Bit() && !SDValue(Node, 1).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -2450,7 +2552,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - return NULL; + return nullptr; } case X86ISD::CMP: @@ -2486,7 +2588,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // On x86-32, only the ABCD registers have 8-bit subregisters. if (!Subtarget->is64Bit()) { const TargetRegisterClass *TRC; - switch (N0.getValueType().getSimpleVT().SimpleTy) { + switch (N0.getSimpleValueType().SimpleTy) { case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; default: llvm_unreachable("Unsupported TEST operand type!"); @@ -2501,7 +2603,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i8, Reg); // Emit a testb. - return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return nullptr; } // For example, "testl %eax, $2048" to "testb %ah, $8". @@ -2515,7 +2623,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Put the value in an ABCD register. const TargetRegisterClass *TRC; - switch (N0.getValueType().getSimpleVT().SimpleTy) { + switch (N0.getSimpleValueType().SimpleTy) { case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; @@ -2532,8 +2640,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only // target GR8_NOREX registers, so make sure the register class is // forced. - return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32, - Subreg, ShiftedImm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, + MVT::i32, Subreg, ShiftedImm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return nullptr; } // For example, "testl %eax, $32776" to "testw %ax, $32776". @@ -2549,7 +2662,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i16, Reg); // Emit a testw. - return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return nullptr; } // For example, "testq %rax, $268468232" to "testl %eax, $268468232". @@ -2565,7 +2684,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i32, Reg); // Emit a testl. - return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return nullptr; } } break; @@ -2592,7 +2717,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue StoredVal = StoreNode->getOperand(1); unsigned Opc = StoredVal->getOpcode(); - LoadSDNode *LoadNode = 0; + LoadSDNode *LoadNode = nullptr; SDValue InputChain; if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG, LoadNode, InputChain)) @@ -2610,9 +2735,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT LdVT = LoadNode->getMemoryVT(); unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); MachineSDNode *Result = CurDAG->getMachineNode(newOpc, - Node->getDebugLoc(), - MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + SDLoc(Node), + MVT::i32, MVT::Other, Ops); Result->setMemRefs(MemOp, MemOp + 2); ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); @@ -2620,91 +2744,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return Result; } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPESTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - SDValue N3 = Node->getOperand(3); - SDValue N4 = Node->getOperand(4); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast(N4); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, - X86::EAX, N1, SDValue()).getValue(1); - InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, - N3, InFlag).getValue(1); - - SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : - X86::PCMPESTRIrr; - InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPISTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast(N2); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue Ops[] = { N0, N1, getI8Imm(Imm) }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : - X86::PCMPISTRIrr; - SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } } SDNode *ResNode = SelectCode(Node); DEBUG(dbgs() << "=> "; - if (ResNode == NULL || ResNode == Node) + if (ResNode == nullptr || ResNode == Node) Node->dump(CurDAG); else ResNode->dump(CurDAG); @@ -2722,7 +2767,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, case 'v': // not offsetable ?? default: return true; case 'm': // memory - if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4)) + if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) return true; break; }