X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelDAGToDAG.cpp;h=e61632c9adab5c62cd8262d80e4b9955138f4c7a;hb=6465265ae1b9d2d9ffb6cfcebf4b46ad97914aa7;hp=d1ee070381e207c2da61dc86c862c088c0678915;hpb=eeb3a00b84b7767d236ec8cf0619b9217fc247b9;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index d1ee070381e..e61632c9ada 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -12,15 +12,6 @@ // //===----------------------------------------------------------------------===// -// Force NDEBUG on in any optimized build on Darwin. -// -// FIXME: This is a huge hack, to work around ridiculously awful compile times -// on this file with gcc-4.2 on Darwin, in Release mode. -#if (!defined(__llvm__) && defined(__APPLE__) && \ - defined(__OPTIMIZE__) && !defined(NDEBUG)) -#define NDEBUG -#endif - #define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" @@ -113,42 +104,57 @@ namespace { } void dump() { - errs() << "X86ISelAddressMode " << this << '\n'; - errs() << "Base.Reg "; + dbgs() << "X86ISelAddressMode " << this << '\n'; + dbgs() << "Base.Reg "; if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); else - errs() << "nul"; - errs() << " Base.FrameIndex " << Base.FrameIndex << '\n' + dbgs() << "nul"; + dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n' << " Scale" << Scale << '\n' << "IndexReg "; if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); else - errs() << "nul"; - errs() << " Disp " << Disp << '\n' + dbgs() << "nul"; + dbgs() << " Disp " << Disp << '\n' << "GV "; if (GV) GV->dump(); else - errs() << "nul"; - errs() << " CP "; + dbgs() << "nul"; + dbgs() << " CP "; if (CP) CP->dump(); else - errs() << "nul"; - errs() << '\n' + dbgs() << "nul"; + dbgs() << '\n' << "ES "; if (ES) - errs() << ES; + dbgs() << ES; else - errs() << "nul"; - errs() << " JT" << JT << " Align" << Align << '\n'; + dbgs() << "nul"; + dbgs() << " JT" << JT << " Align" << Align << '\n'; } }; } namespace { + class X86ISelListener : public SelectionDAG::DAGUpdateListener { + SmallSet Deletes; + public: + explicit X86ISelListener() {} + virtual void NodeDeleted(SDNode *N, SDNode *E) { + Deletes.insert(N); + } + virtual void NodeUpdated(SDNode *N) { + // Ignore updates. + } + bool IsDeleted(SDNode *N) { + return Deletes.count(N); + } + }; + //===--------------------------------------------------------------------===// /// ISel - X86 specific code to select X86 machine instructions for /// SelectionDAG operations. @@ -177,14 +183,11 @@ namespace { return "X86 DAG->DAG Instruction Selection"; } - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); + virtual void EmitFunctionEntryCode(); - virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); + virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; - virtual - bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; + virtual void PreprocessISelDAG(); // Include the pieces autogenerated from the target description. #include "X86GenDAGISel.inc" @@ -199,6 +202,7 @@ namespace { bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); bool MatchAddress(SDValue N, X86ISelAddressMode &AM); bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, + X86ISelListener &DeadNodes, unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, @@ -208,18 +212,17 @@ namespace { SDValue &Scale, SDValue &Index, SDValue &Disp); bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); - bool SelectScalarSSELoad(SDNode *Op, SDValue Pred, - SDValue N, SDValue &Base, SDValue &Scale, + bool SelectScalarSSELoad(SDNode *Root, SDValue N, + SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, - SDValue &InChain, SDValue &OutChain); + SDValue &NodeWithChain); + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - void PreprocessForRMW(); - void PreprocessForFPConvert(); - + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -295,19 +298,22 @@ namespace { const X86InstrInfo *getInstrInfo() { return getTargetMachine().getInstrInfo(); } - -#ifndef NDEBUG - unsigned Indent; -#endif }; } -bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const { +bool +X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { if (OptLevel == CodeGenOpt::None) return false; - if (U == Root) + if (!N.hasOneUse()) + return false; + + if (N.getOpcode() != ISD::LOAD) + return true; + + // If N is a load, do additional profitability checks. + if (U == Root) { switch (U->getOpcode()) { default: break; case X86ISD::ADD: @@ -354,70 +360,22 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, } } } - - // Proceed to 'generic' cycle finder code - return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); -} - -/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand -/// and move load below the TokenFactor. Replace store's chain operand with -/// load's chain result. -static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, - SDValue Store, SDValue TF) { - SmallVector Ops; - for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) - if (Load.getNode() == TF.getOperand(i).getNode()) - Ops.push_back(Load.getOperand(0)); - else - Ops.push_back(TF.getOperand(i)); - SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); - SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, - Load.getOperand(1), - Load.getOperand(2)); - CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), - Store.getOperand(2), Store.getOperand(3)); -} - -/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The -/// chain produced by the load must only be used by the store's chain operand, -/// otherwise this may produce a cycle in the DAG. -/// -static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, - SDValue &Load) { - if (N.getOpcode() == ISD::BIT_CONVERT) - N = N.getOperand(0); - - LoadSDNode *LD = dyn_cast(N); - if (!LD || LD->isVolatile()) - return false; - if (LD->getAddressingMode() != ISD::UNINDEXED) - return false; - - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) - return false; - - if (N.hasOneUse() && - LD->hasNUsesOfValue(1, 1) && - N.getOperand(1) == Address && - LD->isOperandOf(Chain.getNode())) { - Load = N; - return true; } - return false; + + return true; } -/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain -/// operand and move load below the call's chain operand. -static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, - SDValue Call, SDValue CallSeqStart) { +/// MoveBelowCallOrigChain - Replace the original chain operand of the call with +/// load's chain operand and move load below the call's chain operand. +static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, + SDValue Call, SDValue OrigChain) { SmallVector Ops; - SDValue Chain = CallSeqStart.getOperand(0); + SDValue Chain = OrigChain.getOperand(0); if (Chain.getNode() == Load.getNode()) Ops.push_back(Load.getOperand(0)); else { assert(Chain.getOpcode() == ISD::TokenFactor && - "Unexpected CallSeqStart chain operand"); + "Unexpected chain operand"); for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) if (Chain.getOperand(i).getNode() == Load.getNode()) Ops.push_back(Load.getOperand(0)); @@ -429,9 +387,9 @@ static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, Ops.clear(); Ops.push_back(NewChain); } - for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i) - Ops.push_back(CallSeqStart.getOperand(i)); - CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size()); + for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i) + Ops.push_back(OrigChain.getOperand(i)); + CurDAG->UpdateNodeOperands(OrigChain, &Ops[0], Ops.size()); CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); Ops.clear(); @@ -444,7 +402,9 @@ static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, /// isCalleeLoad - Return true if call address is a load and it can be /// moved below CALLSEQ_START and the chains leading up to the call. /// Return the CALLSEQ_START by reference as a second output. -static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { +/// In the case of a tail call, there isn't a callseq node between the call +/// chain and the load. +static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) return false; LoadSDNode *LD = dyn_cast(Callee.getNode()); @@ -455,12 +415,14 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { return false; // Now let's find the callseq_start. - while (Chain.getOpcode() != ISD::CALLSEQ_START) { + while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { if (!Chain.hasOneUse()) return false; Chain = Chain.getOperand(0); } - + + if (!Chain.getNumOperands()) + return false; if (Chain.getOperand(0).getNode() == Callee.getNode()) return true; if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && @@ -470,51 +432,17 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { return false; } - -/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. -/// This is only run if not in -O0 mode. -/// This allows the instruction selector to pick more read-modify-write -/// instructions. This is a common case: -/// -/// [Load chain] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// / \- -/// / | -/// [TokenFactor] [Op] -/// ^ ^ -/// | | -/// \ / -/// \ / -/// [Store] -/// -/// The fact the store's chain operand != load's chain will prevent the -/// (store (op (load))) instruction from being selected. We can transform it to: -/// -/// [Load chain] -/// ^ -/// | -/// [TokenFactor] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// | \- -/// | | -/// | [Op] -/// | ^ -/// | | -/// \ / -/// \ / -/// [Store] -void X86DAGToDAGISel::PreprocessForRMW() { +void X86DAGToDAGISel::PreprocessISelDAG() { + // OptForSize is used in pattern predicates that isel is matching. + OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - if (I->getOpcode() == X86ISD::CALL) { + E = CurDAG->allnodes_end(); I != E; ) { + SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + if (OptLevel != CodeGenOpt::None && + (N->getOpcode() == X86ISD::CALL || + N->getOpcode() == X86ISD::TC_RETURN)) { /// Also try moving call address load from outside callseq_start to just /// before the call to allow it to be folded. /// @@ -534,84 +462,24 @@ void X86DAGToDAGISel::PreprocessForRMW() { /// \ / /// \ / /// [CALL] - SDValue Chain = I->getOperand(0); - SDValue Load = I->getOperand(1); - if (!isCalleeLoad(Load, Chain)) + bool HasCallSeq = N->getOpcode() == X86ISD::CALL; + SDValue Chain = N->getOperand(0); + SDValue Load = N->getOperand(1); + if (!isCalleeLoad(Load, Chain, HasCallSeq)) continue; - MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); + MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); ++NumLoadMoved; continue; } - - if (!ISD::isNON_TRUNCStore(I)) - continue; - SDValue Chain = I->getOperand(0); - - if (Chain.getNode()->getOpcode() != ISD::TokenFactor) - continue; - - SDValue N1 = I->getOperand(1); - SDValue N2 = I->getOperand(2); - if ((N1.getValueType().isFloatingPoint() && - !N1.getValueType().isVector()) || - !N1.hasOneUse()) - continue; - - bool RModW = false; - SDValue Load; - unsigned Opcode = N1.getNode()->getOpcode(); - switch (Opcode) { - case ISD::ADD: - case ISD::MUL: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::ADDC: - case ISD::ADDE: - case ISD::VECTOR_SHUFFLE: { - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - RModW = isRMWLoad(N10, Chain, N2, Load); - if (!RModW) - RModW = isRMWLoad(N11, Chain, N2, Load); - break; - } - case ISD::SUB: - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - case ISD::ROTL: - case ISD::ROTR: - case ISD::SUBC: - case ISD::SUBE: - case X86ISD::SHLD: - case X86ISD::SHRD: { - SDValue N10 = N1.getOperand(0); - RModW = isRMWLoad(N10, Chain, N2, Load); - break; - } - } - - if (RModW) { - MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); - ++NumLoadMoved; - } - } -} - - -/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend -/// nodes that target the FP stack to be store and load to the stack. This is a -/// gross hack. We would like to simply mark these as being illegal, but when -/// we do that, legalize produces these when it expands calls, then expands -/// these in the same legalize pass. We would like dag combine to be able to -/// hack on these between the call expansion and the node legalization. As such -/// this pass basically does "really late" legalization of these inline with the -/// X86 isel pass. -void X86DAGToDAGISel::PreprocessForFPConvert() { - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ) { - SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + // Lower fpround and fpextend nodes that target the FP stack to be store and + // load to the stack. This is a gross hack. We would like to simply mark + // these as being illegal, but when we do that, legalize produces these when + // it expands calls, then expands these in the same legalize pass. We would + // like dag combine to be able to hack on these between the call expansion + // and the node legalization. As such this pass basically does "really + // late" legalization of these inline with the X86 isel pass. + // FIXME: This should only happen when not compiled with -O0. if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; @@ -648,9 +516,10 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), - MemTmp, NULL, 0, MemVT); + MemTmp, NULL, 0, MemVT, + false, false, 0); SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, - NULL, 0, MemVT); + NULL, 0, MemVT, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the // extload we created. This will cause general havok on the dag because @@ -666,30 +535,6 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { } } -/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel -/// when it has created a SelectionDAG for us to codegen. -void X86DAGToDAGISel::InstructionSelect() { - const Function *F = MF->getFunction(); - OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); - - if (OptLevel != CodeGenOpt::None) - PreprocessForRMW(); - - // FIXME: This should only happen when not compiled with -O0. - PreprocessForFPConvert(); - - // Codegen the basic block. -#ifndef NDEBUG - DEBUG(errs() << "===== Instruction selection begins:\n"); - Indent = 0; -#endif - SelectRoot(*CurDAG); -#ifndef NDEBUG - DEBUG(errs() << "===== Instruction selection ends:\n"); -#endif - - CurDAG->RemoveDeadNodes(); -} /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in /// the main function. @@ -697,15 +542,15 @@ void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI) { const TargetInstrInfo *TII = TM.getInstrInfo(); if (Subtarget->isTargetCygMing()) - BuildMI(BB, DebugLoc::getUnknownLoc(), + BuildMI(BB, DebugLoc(), TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); } -void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { +void X86DAGToDAGISel::EmitFunctionEntryCode() { // If this is main, emit special code for main. - MachineBasicBlock *BB = MF.begin(); - if (Fn.hasExternalLinkage() && Fn.getName() == "main") - EmitSpecialCodeForMain(BB, MF.getFrameInfo()); + if (const Function *Fn = MF->getFunction()) + if (Fn->hasExternalLinkage() && Fn->getName() == "main") + EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo()); } @@ -822,7 +667,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { /// returning true if it cannot be done. This just pattern matches for the /// addressing mode. bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { - if (MatchAddressRecursively(N, AM, 0)) + X86ISelListener DeadNodes; + if (MatchAddressRecursively(N, AM, DeadNodes, 0)) return true; // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has @@ -851,11 +697,12 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { } bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, + X86ISelListener &DeadNodes, unsigned Depth) { bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); DEBUG({ - errs() << "MatchAddress: "; + dbgs() << "MatchAddress: "; AM.dump(); }); // Limit recursion. @@ -940,7 +787,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Okay, we know that we have a scale by now. However, if the scaled // value is an add of something and a constant, we can fold the // constant into the disp field here. - if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() && + if (ShVal.getNode()->getOpcode() == ISD::ADD && isa(ShVal.getNode()->getOperand(1))) { AM.IndexReg = ShVal.getNode()->getOperand(0); ConstantSDNode *AddVal = @@ -1016,7 +863,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Test if the LHS of the sub can be folded. X86ISelAddressMode Backup = AM; - if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { + if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, + DeadNodes, Depth+1) || + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + DeadNodes.IsDeleted(N.getNode())) { AM = Backup; break; } @@ -1025,6 +876,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM = Backup; break; } + int Cost = 0; SDValue RHS = N.getNode()->getOperand(1); // If the RHS involves a register with multiple uses, this @@ -1078,13 +930,33 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::ADD: { X86ISelAddressMode Backup = AM; - if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) && - !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1)) - return false; + if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, + DeadNodes, Depth+1)) { + if (DeadNodes.IsDeleted(N.getNode())) + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + return true; + if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, + DeadNodes, Depth+1)) + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + return DeadNodes.IsDeleted(N.getNode()); + } + + // Try again after commuting the operands. AM = Backup; - if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) && - !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) - return false; + if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, + DeadNodes, Depth+1)) { + if (DeadNodes.IsDeleted(N.getNode())) + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + return true; + if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, + DeadNodes, Depth+1)) + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + return DeadNodes.IsDeleted(N.getNode()); + } AM = Backup; // If we couldn't fold both operands into the address at the same time, @@ -1106,16 +978,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (ConstantSDNode *CN = dyn_cast(N.getOperand(1))) { X86ISelAddressMode Backup = AM; uint64_t Offset = CN->getSExtValue(); + + // Check to see if the LHS & C is zero. + if (!CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) + break; + // Start with the LHS as an addr mode. - if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && + if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) && // Address could not have picked a GV address for the displacement. AM.GV == NULL && // On x86-64, the resultant disp must fit in 32-bits. (!is64Bit || X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, - AM.hasSymbolicDisplacement())) && - // Check to see if the LHS & C is zero. - CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { + AM.hasSymbolicDisplacement()))) { AM.Disp += Offset; return false; } @@ -1186,7 +1061,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, CurDAG->RepositionNode(N.getNode(), Shl.getNode()); Shl.getNode()->setNodeId(N.getNode()->getNodeId()); } - CurDAG->ReplaceAllUsesWith(N, Shl); + CurDAG->ReplaceAllUsesWith(N, Shl, &DeadNodes); AM.IndexReg = And; AM.Scale = (1 << ScaleLog); return false; @@ -1237,7 +1112,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); } - CurDAG->ReplaceAllUsesWith(N, NewSHIFT); + CurDAG->ReplaceAllUsesWith(N, NewSHIFT, &DeadNodes); AM.Scale = 1 << ShiftCst; AM.IndexReg = NewAND; @@ -1296,22 +1171,24 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to /// match a load whose top elements are either undef or zeros. The load flavor /// is derived from the type of N, which is either v4f32 or v2f64. -bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, +/// +/// We also return: +/// PatternChainNode: this is the matched node that has a chain input and +/// output. +bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, - SDValue &InChain, - SDValue &OutChain) { + SDValue &PatternNodeWithChain) { if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { - InChain = N.getOperand(0).getValue(1); - if (ISD::isNON_EXTLoad(InChain.getNode()) && - InChain.getValue(0).hasOneUse() && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) { - LoadSDNode *LD = cast(InChain); - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + PatternNodeWithChain = N.getOperand(0); + if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && + PatternNodeWithChain.hasOneUse() && + IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && + IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { + LoadSDNode *LD = cast(PatternNodeWithChain); + if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment)) return false; - OutChain = LD->getChain(); return true; } } @@ -1323,13 +1200,14 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && N.getOperand(0).getNode()->hasOneUse() && ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && - N.getOperand(0).getOperand(0).hasOneUse()) { + N.getOperand(0).getOperand(0).hasOneUse() && + IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && + IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { // Okay, this is a zero extending load. Fold it. LoadSDNode *LD = cast(N.getOperand(0).getOperand(0)); - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; - OutChain = LD->getChain(); - InChain = SDValue(LD, 1); + PatternNodeWithChain = SDValue(LD, 0); return true; } return false; @@ -1403,7 +1281,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp) { - assert(Op->getOpcode() == X86ISD::TLSADDR); assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast(N); @@ -1430,11 +1307,12 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { - if (ISD::isNON_EXTLoad(N.getNode()) && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P, P)) - return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); - return false; + if (!ISD::isNON_EXTLoad(N.getNode()) || + !IsProfitableToFold(N, P, P) || + !IsLegalToFold(N, P, P)) + return false; + + return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); } /// getGlobalBaseReg - Return an SDNode that returns the value of @@ -1537,7 +1415,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC16m; else if (isSub) { if (isCN) { - if (Predicate_i16immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB16mi8; else Opc = X86::LOCK_SUB16mi; @@ -1545,7 +1423,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB16mr; } else { if (isCN) { - if (Predicate_i16immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD16mi8; else Opc = X86::LOCK_ADD16mi; @@ -1560,7 +1438,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC32m; else if (isSub) { if (isCN) { - if (Predicate_i32immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB32mi8; else Opc = X86::LOCK_SUB32mi; @@ -1568,7 +1446,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB32mr; } else { if (isCN) { - if (Predicate_i32immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD32mi8; else Opc = X86::LOCK_ADD32mi; @@ -1584,7 +1462,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { else if (isSub) { Opc = X86::LOCK_SUB64mr; if (isCN) { - if (Predicate_i64immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB64mi8; else if (Predicate_i64immSExt32(Val.getNode())) Opc = X86::LOCK_SUB64mi32; @@ -1592,7 +1470,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { } else { Opc = X86::LOCK_ADD64mr; if (isCN) { - if (Predicate_i64immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD64mi8; else if (Predicate_i64immSExt32(Val.getNode())) Opc = X86::LOCK_ADD64mi32; @@ -1602,7 +1480,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { } DebugLoc dl = Node->getDebugLoc(); - SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(Node)->getMemOperand(); @@ -1648,8 +1526,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) { case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: - case X86::JA: case X86::JAE: case X86::JB: case X86::JBE: - case X86::JE: case X86::JNE: case X86::JP: case X86::JNP: + case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4: + case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4: case X86::CMOVA16rr: case X86::CMOVA16rm: case X86::CMOVA32rr: case X86::CMOVA32rm: case X86::CMOVA64rr: case X86::CMOVA64rm: @@ -1689,24 +1567,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); -#ifndef NDEBUG - DEBUG({ - errs() << std::string(Indent, ' ') << "Selecting: "; - Node->dump(CurDAG); - errs() << '\n'; - }); - Indent += 2; -#endif + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); if (Node->isMachineOpcode()) { -#ifndef NDEBUG - DEBUG({ - errs() << std::string(Indent-2, ' ') << "== "; - Node->dump(CurDAG); - errs() << '\n'; - }); - Indent -= 2; -#endif + DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); return NULL; // Already selected. } @@ -1802,13 +1666,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { LoReg, NVT, InFlag); InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 0), Result); -#ifndef NDEBUG - DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - errs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { @@ -1831,19 +1689,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag = Result.getValue(2); } ReplaceUses(SDValue(Node, 1), Result); -#ifndef NDEBUG - DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - errs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } @@ -1873,7 +1721,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned LoReg, HiReg, ClrReg; unsigned ClrOpcode, SExtOpcode; - EVT ClrVT = NVT; switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: @@ -1883,7 +1730,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; - ClrOpcode = X86::MOV32r0; ClrReg = X86::EDX; ClrVT = MVT::i32; + ClrOpcode = X86::MOV16r0; ClrReg = X86::DX; SExtOpcode = X86::CWD; break; case MVT::i32: @@ -1893,7 +1740,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; case MVT::i64: LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; - ClrOpcode = ~0U; // NOT USED. + ClrOpcode = X86::MOV64r0; SExtOpcode = X86::CQO; break; } @@ -1932,24 +1779,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); } else { // Zero out the high part, effectively zero extending the input. - SDValue ClrNode; - - if (NVT.getSimpleVT() == MVT::i64) { - ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32), - 0); - // We just did a 32-bit clear, insert it into a 64-bit register to - // clear the whole 64-bit reg. - SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64); - SDValue SubRegNo = - CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); - ClrNode = - SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl, - MVT::i64, Zero, ClrNode, SubRegNo), - 0); - } else { - ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, ClrVT), 0); - } - + SDValue ClrNode = + SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, ClrNode, InFlag).getValue(1); } @@ -1975,13 +1806,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { LoReg, NVT, InFlag); InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 0), Result); -#ifndef NDEBUG - DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - errs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the remainder (high) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { @@ -2005,19 +1830,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag = Result.getValue(2); } ReplaceUses(SDValue(Node, 1), Result); -#ifndef NDEBUG - DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - errs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } @@ -2130,17 +1944,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDNode *ResNode = SelectCode(Node); -#ifndef NDEBUG - DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; - if (ResNode == NULL || ResNode == Node) - Node->dump(CurDAG); - else - ResNode->dump(CurDAG); - errs() << '\n'; - }); - Indent -= 2; -#endif + DEBUG(dbgs() << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + dbgs() << '\n'); return ResNode; }