lib/Target/X86/X86ISelDAGToDAG.cpp

   1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines a DAG pattern matching instruction selector for X86,
  11 // converting from a legalized dag to a X86 dag.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "x86-isel"
  16 #include "X86.h"
  17 #include "X86InstrBuilder.h"
  18 #include "X86ISelLowering.h"
  19 #include "X86MachineFunctionInfo.h"
  20 #include "X86RegisterInfo.h"
  21 #include "X86Subtarget.h"
  22 #include "X86TargetMachine.h"
  23 #include "llvm/GlobalValue.h"
  24 #include "llvm/Instructions.h"
  25 #include "llvm/Intrinsics.h"
  26 #include "llvm/Support/CFG.h"
  27 #include "llvm/Type.h"
  28 #include "llvm/CodeGen/MachineConstantPool.h"
  29 #include "llvm/CodeGen/MachineFunction.h"
  30 #include "llvm/CodeGen/MachineFrameInfo.h"
  31 #include "llvm/CodeGen/MachineInstrBuilder.h"
  32 #include "llvm/CodeGen/MachineRegisterInfo.h"
  33 #include "llvm/CodeGen/SelectionDAGISel.h"
  34 #include "llvm/Target/TargetMachine.h"
  35 #include "llvm/Target/TargetOptions.h"
  36 #include "llvm/Support/Compiler.h"
  37 #include "llvm/Support/Debug.h"
  38 #include "llvm/Support/MathExtras.h"
  39 #include "llvm/Support/Streams.h"
  40 #include "llvm/ADT/SmallPtrSet.h"
  41 #include "llvm/ADT/Statistic.h"
  42 using namespace llvm;
  43
  44 #include "llvm/Support/CommandLine.h"
  45 static cl::opt<bool> AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden);
  46
  47 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
  48
  49 //===----------------------------------------------------------------------===//
  50 //                      Pattern Matcher Implementation
  51 //===----------------------------------------------------------------------===//
  52
  53 namespace {
  54   /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
  55   /// SDValue's instead of register numbers for the leaves of the matched
  56   /// tree.
  57   struct X86ISelAddressMode {
  58     enum {
  59       RegBase,
  60       FrameIndexBase
  61     } BaseType;
  62
  63     struct {            // This is really a union, discriminated by BaseType!
  64       SDValue Reg;
  65       int FrameIndex;
  66     } Base;
  67
  68     bool isRIPRel;     // RIP as base?
  69     unsigned Scale;
  70     SDValue IndexReg;
  71     int32_t Disp;
  72     SDValue Segment;
  73     GlobalValue *GV;
  74     Constant *CP;
  75     const char *ES;
  76     int JT;
  77     unsigned Align;    // CP alignment.
  78
  79     X86ISelAddressMode()
  80       : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
  81         Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0) {
  82     }
  83
  84     bool hasSymbolicDisplacement() const {
  85       return GV != 0 || CP != 0 || ES != 0 || JT != -1;
  86     }
  87
  88     void dump() {
  89       cerr << "X86ISelAddressMode " << this << "\n";
  90       cerr << "Base.Reg ";
  91               if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump();
  92               else cerr << "nul";
  93       cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
  94       cerr << "isRIPRel " << isRIPRel << " Scale" << Scale << "\n";
  95       cerr << "IndexReg ";
  96               if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
  97               else cerr << "nul";
  98       cerr << " Disp " << Disp << "\n";
  99       cerr << "GV "; if (GV) GV->dump();
 100                      else cerr << "nul";
 101       cerr << " CP "; if (CP) CP->dump();
 102                      else cerr << "nul";
 103       cerr << "\n";
 104       cerr << "ES "; if (ES) cerr << ES; else cerr << "nul";
 105       cerr  << " JT" << JT << " Align" << Align << "\n";
 106     }
 107   };
 108 }
 109
 110 namespace {
 111   //===--------------------------------------------------------------------===//
 112   /// ISel - X86 specific code to select X86 machine instructions for
 113   /// SelectionDAG operations.
 114   ///
 115   class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
 116     /// X86Lowering - This object fully describes how to lower LLVM code to an
 117     /// X86-specific SelectionDAG.
 118     X86TargetLowering &X86Lowering;
 119
 120     /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
 121     /// make the right decision when generating code for different targets.
 122     const X86Subtarget *Subtarget;
 123
 124     /// CurBB - Current BB being isel'd.
 125     ///
 126     MachineBasicBlock *CurBB;
 127
 128     /// OptForSize - If true, selector should try to optimize for code size
 129     /// instead of performance.
 130     bool OptForSize;
 131
 132   public:
 133     explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
 134       : SelectionDAGISel(tm, OptLevel),
 135         X86Lowering(*tm.getTargetLowering()),
 136         Subtarget(&tm.getSubtarget<X86Subtarget>()),
 137         OptForSize(false) {}
 138
 139     virtual const char *getPassName() const {
 140       return "X86 DAG->DAG Instruction Selection";
 141     }
 142
 143     /// InstructionSelect - This callback is invoked by
 144     /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
 145     virtual void InstructionSelect();
 146
 147     virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
 148
 149     virtual
 150       bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
 151
 152 // Include the pieces autogenerated from the target description.
 153 #include "X86GenDAGISel.inc"
 154
 155   private:
 156     SDNode *Select(SDValue N);
 157     SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
 158
 159     bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
 160     bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
 161     bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
 162     bool MatchAddress(SDValue N, X86ISelAddressMode &AM,
 163                       unsigned Depth = 0);
 164     bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
 165     bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
 166                     SDValue &Scale, SDValue &Index, SDValue &Disp,
 167                     SDValue &Segment);
 168     bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
 169                        SDValue &Scale, SDValue &Index, SDValue &Disp);
 170     bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
 171                              SDValue N, SDValue &Base, SDValue &Scale,
 172                              SDValue &Index, SDValue &Disp,
 173                              SDValue &Segment,
 174                              SDValue &InChain, SDValue &OutChain);
 175     bool TryFoldLoad(SDValue P, SDValue N,
 176                      SDValue &Base, SDValue &Scale,
 177                      SDValue &Index, SDValue &Disp,
 178                      SDValue &Segment);
 179     void PreprocessForRMW();
 180     void PreprocessForFPConvert();
 181
 182     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 183     /// inline asm expressions.
 184     virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
 185                                               char ConstraintCode,
 186                                               std::vector<SDValue> &OutOps);
 187
 188     void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
 189
 190     inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
 191                                    SDValue &Scale, SDValue &Index,
 192                                    SDValue &Disp, SDValue &Segment) {
 193       Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
 194         CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
 195         AM.Base.Reg;
 196       Scale = getI8Imm(AM.Scale);
 197       Index = AM.IndexReg;
 198       // These are 32-bit even in 64-bit mode since RIP relative offset
 199       // is 32-bit.
 200       if (AM.GV)
 201         Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
 202       else if (AM.CP)
 203         Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
 204                                              AM.Align, AM.Disp);
 205       else if (AM.ES)
 206         Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
 207       else if (AM.JT != -1)
 208         Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
 209       else
 210         Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
 211
 212       if (AM.Segment.getNode())
 213         Segment = AM.Segment;
 214       else
 215         Segment = CurDAG->getRegister(0, MVT::i32);
 216     }
 217
 218     /// getI8Imm - Return a target constant with the specified value, of type
 219     /// i8.
 220     inline SDValue getI8Imm(unsigned Imm) {
 221       return CurDAG->getTargetConstant(Imm, MVT::i8);
 222     }
 223
 224     /// getI16Imm - Return a target constant with the specified value, of type
 225     /// i16.
 226     inline SDValue getI16Imm(unsigned Imm) {
 227       return CurDAG->getTargetConstant(Imm, MVT::i16);
 228     }
 229
 230     /// getI32Imm - Return a target constant with the specified value, of type
 231     /// i32.
 232     inline SDValue getI32Imm(unsigned Imm) {
 233       return CurDAG->getTargetConstant(Imm, MVT::i32);
 234     }
 235
 236     /// getGlobalBaseReg - Return an SDNode that returns the value of
 237     /// the global base register. Output instructions required to
 238     /// initialize the global base register, if necessary.
 239     ///
 240     SDNode *getGlobalBaseReg();
 241
 242     /// getTargetMachine - Return a reference to the TargetMachine, casted
 243     /// to the target-specific type.
 244     const X86TargetMachine &getTargetMachine() {
 245       return static_cast<const X86TargetMachine &>(TM);
 246     }
 247
 248     /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
 249     /// to the target-specific type.
 250     const X86InstrInfo *getInstrInfo() {
 251       return getTargetMachine().getInstrInfo();
 252     }
 253
 254 #ifndef NDEBUG
 255     unsigned Indent;
 256 #endif
 257   };
 258 }
 259
 260
 261 bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
 262                                                  SDNode *Root) const {
 263   if (OptLevel == CodeGenOpt::None) return false;
 264
 265   if (U == Root)
 266     switch (U->getOpcode()) {
 267     default: break;
 268     case ISD::ADD:
 269     case ISD::ADDC:
 270     case ISD::ADDE:
 271     case ISD::AND:
 272     case ISD::OR:
 273     case ISD::XOR: {
 274       SDValue Op1 = U->getOperand(1);
 275
 276       // If the other operand is a 8-bit immediate we should fold the immediate
 277       // instead. This reduces code size.
 278       // e.g.
 279       // movl 4(%esp), %eax
 280       // addl $4, %eax
 281       // vs.
 282       // movl $4, %eax
 283       // addl 4(%esp), %eax
 284       // The former is 2 bytes shorter. In case where the increment is 1, then
 285       // the saving can be 4 bytes (by using incl %eax).
 286       if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
 287         if (Imm->getAPIntValue().isSignedIntN(8))
 288           return false;
 289
 290       // If the other operand is a TLS address, we should fold it instead.
 291       // This produces
 292       // movl    %gs:0, %eax
 293       // leal    i@NTPOFF(%eax), %eax
 294       // instead of
 295       // movl    $i@NTPOFF, %eax
 296       // addl    %gs:0, %eax
 297       // if the block also has an access to a second TLS address this will save
 298       // a load.
 299       // FIXME: This is probably also true for non TLS addresses.
 300       if (Op1.getOpcode() == X86ISD::Wrapper) {
 301         SDValue Val = Op1.getOperand(0);
 302         if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
 303           return false;
 304       }
 305     }
 306     }
 307
 308   // Proceed to 'generic' cycle finder code
 309   return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
 310 }
 311
 312 /// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
 313 /// and move load below the TokenFactor. Replace store's chain operand with
 314 /// load's chain result.
 315 static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
 316                                  SDValue Store, SDValue TF) {
 317   SmallVector<SDValue, 4> Ops;
 318   for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
 319     if (Load.getNode() == TF.getOperand(i).getNode())
 320       Ops.push_back(Load.getOperand(0));
 321     else
 322       Ops.push_back(TF.getOperand(i));
 323   CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
 324   CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
 325   CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
 326                              Store.getOperand(2), Store.getOperand(3));
 327 }
 328
 329 /// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
 330 ///
 331 static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
 332                       SDValue &Load) {
 333   if (N.getOpcode() == ISD::BIT_CONVERT)
 334     N = N.getOperand(0);
 335
 336   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
 337   if (!LD || LD->isVolatile())
 338     return false;
 339   if (LD->getAddressingMode() != ISD::UNINDEXED)
 340     return false;
 341
 342   ISD::LoadExtType ExtType = LD->getExtensionType();
 343   if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
 344     return false;
 345
 346   if (N.hasOneUse() &&
 347       N.getOperand(1) == Address &&
 348       N.getNode()->isOperandOf(Chain.getNode())) {
 349     Load = N;
 350     return true;
 351   }
 352   return false;
 353 }
 354
 355 /// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
 356 /// operand and move load below the call's chain operand.
 357 static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
 358                                   SDValue Call, SDValue CallSeqStart) {
 359   SmallVector<SDValue, 8> Ops;
 360   SDValue Chain = CallSeqStart.getOperand(0);
 361   if (Chain.getNode() == Load.getNode())
 362     Ops.push_back(Load.getOperand(0));
 363   else {
 364     assert(Chain.getOpcode() == ISD::TokenFactor &&
 365            "Unexpected CallSeqStart chain operand");
 366     for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
 367       if (Chain.getOperand(i).getNode() == Load.getNode())
 368         Ops.push_back(Load.getOperand(0));
 369       else
 370         Ops.push_back(Chain.getOperand(i));
 371     SDValue NewChain =
 372       CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
 373                       MVT::Other, &Ops[0], Ops.size());
 374     Ops.clear();
 375     Ops.push_back(NewChain);
 376   }
 377   for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i)
 378     Ops.push_back(CallSeqStart.getOperand(i));
 379   CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size());
 380   CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
 381                              Load.getOperand(1), Load.getOperand(2));
 382   Ops.clear();
 383   Ops.push_back(SDValue(Load.getNode(), 1));
 384   for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
 385     Ops.push_back(Call.getOperand(i));
 386   CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size());
 387 }
 388
 389 /// isCalleeLoad - Return true if call address is a load and it can be
 390 /// moved below CALLSEQ_START and the chains leading up to the call.
 391 /// Return the CALLSEQ_START by reference as a second output.
 392 static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
 393   if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
 394     return false;
 395   LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
 396   if (!LD ||
 397       LD->isVolatile() ||
 398       LD->getAddressingMode() != ISD::UNINDEXED ||
 399       LD->getExtensionType() != ISD::NON_EXTLOAD)
 400     return false;
 401
 402   // Now let's find the callseq_start.
 403   while (Chain.getOpcode() != ISD::CALLSEQ_START) {
 404     if (!Chain.hasOneUse())
 405       return false;
 406     Chain = Chain.getOperand(0);
 407   }
 408
 409   if (Chain.getOperand(0).getNode() == Callee.getNode())
 410     return true;
 411   if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
 412       Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()))
 413     return true;
 414   return false;
 415 }
 416
 417
 418 /// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
 419 /// This is only run if not in -O0 mode.
 420 /// This allows the instruction selector to pick more read-modify-write
 421 /// instructions. This is a common case:
 422 ///
 423 ///     [Load chain]
 424 ///         ^
 425 ///         |
 426 ///       [Load]
 427 ///       ^    ^
 428 ///       |    |
 429 ///      /      \-
 430 ///     /         |
 431 /// [TokenFactor] [Op]
 432 ///     ^          ^
 433 ///     |          |
 434 ///      \        /
 435 ///       \      /
 436 ///       [Store]
 437 ///
 438 /// The fact the store's chain operand != load's chain will prevent the
 439 /// (store (op (load))) instruction from being selected. We can transform it to:
 440 ///
 441 ///     [Load chain]
 442 ///         ^
 443 ///         |
 444 ///    [TokenFactor]
 445 ///         ^
 446 ///         |
 447 ///       [Load]
 448 ///       ^    ^
 449 ///       |    |
 450 ///       |     \-
 451 ///       |       |
 452 ///       |     [Op]
 453 ///       |       ^
 454 ///       |       |
 455 ///       \      /
 456 ///        \    /
 457 ///       [Store]
 458 void X86DAGToDAGISel::PreprocessForRMW() {
 459   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 460          E = CurDAG->allnodes_end(); I != E; ++I) {
 461     if (I->getOpcode() == X86ISD::CALL) {
 462       /// Also try moving call address load from outside callseq_start to just
 463       /// before the call to allow it to be folded.
 464       ///
 465       ///     [Load chain]
 466       ///         ^
 467       ///         |
 468       ///       [Load]
 469       ///       ^    ^
 470       ///       |    |
 471       ///      /      \--
 472       ///     /          |
 473       ///[CALLSEQ_START] |
 474       ///     ^          |
 475       ///     |          |
 476       /// [LOAD/C2Reg]   |
 477       ///     |          |
 478       ///      \        /
 479       ///       \      /
 480       ///       [CALL]
 481       SDValue Chain = I->getOperand(0);
 482       SDValue Load  = I->getOperand(1);
 483       if (!isCalleeLoad(Load, Chain))
 484         continue;
 485       MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
 486       ++NumLoadMoved;
 487       continue;
 488     }
 489
 490     if (!ISD::isNON_TRUNCStore(I))
 491       continue;
 492     SDValue Chain = I->getOperand(0);
 493
 494     if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
 495       continue;
 496
 497     SDValue N1 = I->getOperand(1);
 498     SDValue N2 = I->getOperand(2);
 499     if ((N1.getValueType().isFloatingPoint() &&
 500          !N1.getValueType().isVector()) ||
 501         !N1.hasOneUse())
 502       continue;
 503
 504     bool RModW = false;
 505     SDValue Load;
 506     unsigned Opcode = N1.getNode()->getOpcode();
 507     switch (Opcode) {
 508     case ISD::ADD:
 509     case ISD::MUL:
 510     case ISD::AND:
 511     case ISD::OR:
 512     case ISD::XOR:
 513     case ISD::ADDC:
 514     case ISD::ADDE:
 515     case ISD::VECTOR_SHUFFLE: {
 516       SDValue N10 = N1.getOperand(0);
 517       SDValue N11 = N1.getOperand(1);
 518       RModW = isRMWLoad(N10, Chain, N2, Load);
 519       if (!RModW)
 520         RModW = isRMWLoad(N11, Chain, N2, Load);
 521       break;
 522     }
 523     case ISD::SUB:
 524     case ISD::SHL:
 525     case ISD::SRA:
 526     case ISD::SRL:
 527     case ISD::ROTL:
 528     case ISD::ROTR:
 529     case ISD::SUBC:
 530     case ISD::SUBE:
 531     case X86ISD::SHLD:
 532     case X86ISD::SHRD: {
 533       SDValue N10 = N1.getOperand(0);
 534       RModW = isRMWLoad(N10, Chain, N2, Load);
 535       break;
 536     }
 537     }
 538
 539     if (RModW) {
 540       MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
 541       ++NumLoadMoved;
 542     }
 543   }
 544 }
 545
 546
 547 /// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
 548 /// nodes that target the FP stack to be store and load to the stack.  This is a
 549 /// gross hack.  We would like to simply mark these as being illegal, but when
 550 /// we do that, legalize produces these when it expands calls, then expands
 551 /// these in the same legalize pass.  We would like dag combine to be able to
 552 /// hack on these between the call expansion and the node legalization.  As such
 553 /// this pass basically does "really late" legalization of these inline with the
 554 /// X86 isel pass.
 555 void X86DAGToDAGISel::PreprocessForFPConvert() {
 556   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 557        E = CurDAG->allnodes_end(); I != E; ) {
 558     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
 559     if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
 560       continue;
 561
 562     // If the source and destination are SSE registers, then this is a legal
 563     // conversion that should not be lowered.
 564     MVT SrcVT = N->getOperand(0).getValueType();
 565     MVT DstVT = N->getValueType(0);
 566     bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
 567     bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
 568     if (SrcIsSSE && DstIsSSE)
 569       continue;
 570
 571     if (!SrcIsSSE && !DstIsSSE) {
 572       // If this is an FPStack extension, it is a noop.
 573       if (N->getOpcode() == ISD::FP_EXTEND)
 574         continue;
 575       // If this is a value-preserving FPStack truncation, it is a noop.
 576       if (N->getConstantOperandVal(1))
 577         continue;
 578     }
 579
 580     // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
 581     // FPStack has extload and truncstore.  SSE can fold direct loads into other
 582     // operations.  Based on this, decide what we want to do.
 583     MVT MemVT;
 584     if (N->getOpcode() == ISD::FP_ROUND)
 585       MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
 586     else
 587       MemVT = SrcIsSSE ? SrcVT : DstVT;
 588
 589     SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
 590     DebugLoc dl = N->getDebugLoc();
 591
 592     // FIXME: optimize the case where the src/dest is a load or store?
 593     SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
 594                                           N->getOperand(0),
 595                                           MemTmp, NULL, 0, MemVT);
 596     SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
 597                                         NULL, 0, MemVT);
 598
 599     // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
 600     // extload we created.  This will cause general havok on the dag because
 601     // anything below the conversion could be folded into other existing nodes.
 602     // To avoid invalidating 'I', back it up to the convert node.
 603     --I;
 604     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
 605
 606     // Now that we did that, the node is dead.  Increment the iterator to the
 607     // next node to process, then delete N.
 608     ++I;
 609     CurDAG->DeleteNode(N);
 610   }
 611 }
 612
 613 /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
 614 /// when it has created a SelectionDAG for us to codegen.
 615 void X86DAGToDAGISel::InstructionSelect() {
 616   CurBB = BB;  // BB can change as result of isel.
 617   const Function *F = CurDAG->getMachineFunction().getFunction();
 618   OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
 619
 620   DEBUG(BB->dump());
 621   if (OptLevel != CodeGenOpt::None)
 622     PreprocessForRMW();
 623
 624   // FIXME: This should only happen when not compiled with -O0.
 625   PreprocessForFPConvert();
 626
 627   // Codegen the basic block.
 628 #ifndef NDEBUG
 629   DOUT << "===== Instruction selection begins:\n";
 630   Indent = 0;
 631 #endif
 632   SelectRoot(*CurDAG);
 633 #ifndef NDEBUG
 634   DOUT << "===== Instruction selection ends:\n";
 635 #endif
 636
 637   CurDAG->RemoveDeadNodes();
 638 }
 639
 640 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
 641 /// the main function.
 642 void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
 643                                              MachineFrameInfo *MFI) {
 644   const TargetInstrInfo *TII = TM.getInstrInfo();
 645   if (Subtarget->isTargetCygMing())
 646     BuildMI(BB, DebugLoc::getUnknownLoc(),
 647             TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
 648 }
 649
 650 void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
 651   // If this is main, emit special code for main.
 652   MachineBasicBlock *BB = MF.begin();
 653   if (Fn.hasExternalLinkage() && Fn.getName() == "main")
 654     EmitSpecialCodeForMain(BB, MF.getFrameInfo());
 655 }
 656
 657
 658 bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
 659                                               X86ISelAddressMode &AM) {
 660   assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
 661   SDValue Segment = N.getOperand(0);
 662
 663   if (AM.Segment.getNode() == 0) {
 664     AM.Segment = Segment;
 665     return false;
 666   }
 667
 668   return true;
 669 }
 670
 671 bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
 672   // This optimization is valid because the GNU TLS model defines that
 673   // gs:0 (or fs:0 on X86-64) contains its own address.
 674   // For more information see http://people.redhat.com/drepper/tls.pdf
 675
 676   SDValue Address = N.getOperand(1);
 677   if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
 678       !MatchSegmentBaseAddress (Address, AM))
 679     return false;
 680
 681   return true;
 682 }
 683
 684 bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
 685   bool SymbolicAddressesAreRIPRel =
 686     getTargetMachine().symbolicAddressesAreRIPRel();
 687   bool is64Bit = Subtarget->is64Bit();
 688   DOUT << "Wrapper: 64bit " << is64Bit;
 689   DOUT << " AM "; DEBUG(AM.dump()); DOUT << "\n";
 690
 691   // Under X86-64 non-small code model, GV (and friends) are 64-bits.
 692   if (is64Bit && (TM.getCodeModel() != CodeModel::Small))
 693     return true;
 694
 695   // Base and index reg must be 0 in order to use rip as base.
 696   bool canUsePICRel = !AM.Base.Reg.getNode() && !AM.IndexReg.getNode();
 697   if (is64Bit && !canUsePICRel && SymbolicAddressesAreRIPRel)
 698     return true;
 699
 700   if (AM.hasSymbolicDisplacement())
 701     return true;
 702   // If value is available in a register both base and index components have
 703   // been picked, we can't fit the result available in the register in the
 704   // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
 705
 706   SDValue N0 = N.getOperand(0);
 707   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
 708     uint64_t Offset = G->getOffset();
 709     if (!is64Bit || isInt32(AM.Disp + Offset)) {
 710       GlobalValue *GV = G->getGlobal();
 711       bool isRIPRel = SymbolicAddressesAreRIPRel;
 712       if (N0.getOpcode() == llvm::ISD::TargetGlobalTLSAddress) {
 713         TLSModel::Model model =
 714           getTLSModel (GV, TM.getRelocationModel());
 715         if (is64Bit && model == TLSModel::InitialExec)
 716           isRIPRel = true;
 717       }
 718       AM.GV = GV;
 719       AM.Disp += Offset;
 720       AM.isRIPRel = isRIPRel;
 721       return false;
 722     }
 723   } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
 724     uint64_t Offset = CP->getOffset();
 725     if (!is64Bit || isInt32(AM.Disp + Offset)) {
 726       AM.CP = CP->getConstVal();
 727       AM.Align = CP->getAlignment();
 728       AM.Disp += Offset;
 729       AM.isRIPRel = SymbolicAddressesAreRIPRel;
 730       return false;
 731     }
 732   } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
 733     AM.ES = S->getSymbol();
 734     AM.isRIPRel = SymbolicAddressesAreRIPRel;
 735     return false;
 736   } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
 737     AM.JT = J->getIndex();
 738     AM.isRIPRel = SymbolicAddressesAreRIPRel;
 739     return false;
 740   }
 741
 742   return true;
 743 }
 744
 745 /// MatchAddress - Add the specified node to the specified addressing mode,
 746 /// returning true if it cannot be done.  This just pattern matches for the
 747 /// addressing mode.
 748 bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
 749                                    unsigned Depth) {
 750   bool is64Bit = Subtarget->is64Bit();
 751   DebugLoc dl = N.getDebugLoc();
 752   DOUT << "MatchAddress: "; DEBUG(AM.dump());
 753   // Limit recursion.
 754   if (Depth > 5)
 755     return MatchAddressBase(N, AM);
 756
 757   // RIP relative addressing: %rip + 32-bit displacement!
 758   if (AM.isRIPRel) {
 759     if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
 760       uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
 761       if (!is64Bit || isInt32(AM.Disp + Val)) {
 762         AM.Disp += Val;
 763         return false;
 764       }
 765     }
 766     return true;
 767   }
 768
 769   switch (N.getOpcode()) {
 770   default: break;
 771   case ISD::Constant: {
 772     uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
 773     if (!is64Bit || isInt32(AM.Disp + Val)) {
 774       AM.Disp += Val;
 775       return false;
 776     }
 777     break;
 778   }
 779
 780   case X86ISD::SegmentBaseAddress:
 781     if (!MatchSegmentBaseAddress(N, AM))
 782       return false;
 783     break;
 784
 785   case X86ISD::Wrapper:
 786     if (!MatchWrapper(N, AM))
 787       return false;
 788     break;
 789
 790   case ISD::LOAD:
 791     if (!MatchLoad(N, AM))
 792       return false;
 793     break;
 794
 795   case ISD::FrameIndex:
 796     if (AM.BaseType == X86ISelAddressMode::RegBase
 797         && AM.Base.Reg.getNode() == 0) {
 798       AM.BaseType = X86ISelAddressMode::FrameIndexBase;
 799       AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
 800       return false;
 801     }
 802     break;
 803
 804   case ISD::SHL:
 805     if (AM.IndexReg.getNode() != 0 || AM.Scale != 1 || AM.isRIPRel)
 806       break;
 807
 808     if (ConstantSDNode
 809           *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
 810       unsigned Val = CN->getZExtValue();
 811       if (Val == 1 || Val == 2 || Val == 3) {
 812         AM.Scale = 1 << Val;
 813         SDValue ShVal = N.getNode()->getOperand(0);
 814
 815         // Okay, we know that we have a scale by now.  However, if the scaled
 816         // value is an add of something and a constant, we can fold the
 817         // constant into the disp field here.
 818         if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
 819             isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
 820           AM.IndexReg = ShVal.getNode()->getOperand(0);
 821           ConstantSDNode *AddVal =
 822             cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
 823           uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
 824           if (!is64Bit || isInt32(Disp))
 825             AM.Disp = Disp;
 826           else
 827             AM.IndexReg = ShVal;
 828         } else {
 829           AM.IndexReg = ShVal;
 830         }
 831         return false;
 832       }
 833     break;
 834     }
 835
 836   case ISD::SMUL_LOHI:
 837   case ISD::UMUL_LOHI:
 838     // A mul_lohi where we need the low part can be folded as a plain multiply.
 839     if (N.getResNo() != 0) break;
 840     // FALL THROUGH
 841   case ISD::MUL:
 842   case X86ISD::MUL_IMM:
 843     // X*[3,5,9] -> X+X*[2,4,8]
 844     if (AM.BaseType == X86ISelAddressMode::RegBase &&
 845         AM.Base.Reg.getNode() == 0 &&
 846         AM.IndexReg.getNode() == 0 &&
 847         !AM.isRIPRel) {
 848       if (ConstantSDNode
 849             *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
 850         if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
 851             CN->getZExtValue() == 9) {
 852           AM.Scale = unsigned(CN->getZExtValue())-1;
 853
 854           SDValue MulVal = N.getNode()->getOperand(0);
 855           SDValue Reg;
 856
 857           // Okay, we know that we have a scale by now.  However, if the scaled
 858           // value is an add of something and a constant, we can fold the
 859           // constant into the disp field here.
 860           if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
 861               isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
 862             Reg = MulVal.getNode()->getOperand(0);
 863             ConstantSDNode *AddVal =
 864               cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
 865             uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
 866                                       CN->getZExtValue();
 867             if (!is64Bit || isInt32(Disp))
 868               AM.Disp = Disp;
 869             else
 870               Reg = N.getNode()->getOperand(0);
 871           } else {
 872             Reg = N.getNode()->getOperand(0);
 873           }
 874
 875           AM.IndexReg = AM.Base.Reg = Reg;
 876           return false;
 877         }
 878     }
 879     break;
 880
 881   case ISD::SUB: {
 882     // Given A-B, if A can be completely folded into the address and
 883     // the index field with the index field unused, use -B as the index.
 884     // This is a win if a has multiple parts that can be folded into
 885     // the address. Also, this saves a mov if the base register has
 886     // other uses, since it avoids a two-address sub instruction, however
 887     // it costs an additional mov if the index register has other uses.
 888
 889     // Test if the LHS of the sub can be folded.
 890     X86ISelAddressMode Backup = AM;
 891     if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) {
 892       AM = Backup;
 893       break;
 894     }
 895     // Test if the index field is free for use.
 896     if (AM.IndexReg.getNode() || AM.isRIPRel) {
 897       AM = Backup;
 898       break;
 899     }
 900     int Cost = 0;
 901     SDValue RHS = N.getNode()->getOperand(1);
 902     // If the RHS involves a register with multiple uses, this
 903     // transformation incurs an extra mov, due to the neg instruction
 904     // clobbering its operand.
 905     if (!RHS.getNode()->hasOneUse() ||
 906         RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
 907         RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
 908         RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
 909         (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
 910          RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
 911       ++Cost;
 912     // If the base is a register with multiple uses, this
 913     // transformation may save a mov.
 914     if ((AM.BaseType == X86ISelAddressMode::RegBase &&
 915          AM.Base.Reg.getNode() &&
 916          !AM.Base.Reg.getNode()->hasOneUse()) ||
 917         AM.BaseType == X86ISelAddressMode::FrameIndexBase)
 918       --Cost;
 919     // If the folded LHS was interesting, this transformation saves
 920     // address arithmetic.
 921     if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
 922         ((AM.Disp != 0) && (Backup.Disp == 0)) +
 923         (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
 924       --Cost;
 925     // If it doesn't look like it may be an overall win, don't do it.
 926     if (Cost >= 0) {
 927       AM = Backup;
 928       break;
 929     }
 930
 931     // Ok, the transformation is legal and appears profitable. Go for it.
 932     SDValue Zero = CurDAG->getConstant(0, N.getValueType());
 933     SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
 934     AM.IndexReg = Neg;
 935     AM.Scale = 1;
 936
 937     // Insert the new nodes into the topological ordering.
 938     if (Zero.getNode()->getNodeId() == -1 ||
 939         Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
 940       CurDAG->RepositionNode(N.getNode(), Zero.getNode());
 941       Zero.getNode()->setNodeId(N.getNode()->getNodeId());
 942     }
 943     if (Neg.getNode()->getNodeId() == -1 ||
 944         Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
 945       CurDAG->RepositionNode(N.getNode(), Neg.getNode());
 946       Neg.getNode()->setNodeId(N.getNode()->getNodeId());
 947     }
 948     return false;
 949   }
 950
 951   case ISD::ADD: {
 952     X86ISelAddressMode Backup = AM;
 953     if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) &&
 954         !MatchAddress(N.getNode()->getOperand(1), AM, Depth+1))
 955       return false;
 956     AM = Backup;
 957     if (!MatchAddress(N.getNode()->getOperand(1), AM, Depth+1) &&
 958         !MatchAddress(N.getNode()->getOperand(0), AM, Depth+1))
 959       return false;
 960     AM = Backup;
 961
 962     // If we couldn't fold both operands into the address at the same time,
 963     // see if we can just put each operand into a register and fold at least
 964     // the add.
 965     if (AM.BaseType == X86ISelAddressMode::RegBase &&
 966         !AM.Base.Reg.getNode() &&
 967         !AM.IndexReg.getNode() &&
 968         !AM.isRIPRel) {
 969       AM.Base.Reg = N.getNode()->getOperand(0);
 970       AM.IndexReg = N.getNode()->getOperand(1);
 971       AM.Scale = 1;
 972       return false;
 973     }
 974     break;
 975   }
 976
 977   case ISD::OR:
 978     // Handle "X | C" as "X + C" iff X is known to have C bits clear.
 979     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 980       X86ISelAddressMode Backup = AM;
 981       uint64_t Offset = CN->getSExtValue();
 982       // Start with the LHS as an addr mode.
 983       if (!MatchAddress(N.getOperand(0), AM, Depth+1) &&
 984           // Address could not have picked a GV address for the displacement.
 985           AM.GV == NULL &&
 986           // On x86-64, the resultant disp must fit in 32-bits.
 987           (!is64Bit || isInt32(AM.Disp + Offset)) &&
 988           // Check to see if the LHS & C is zero.
 989           CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
 990         AM.Disp += Offset;
 991         return false;
 992       }
 993       AM = Backup;
 994     }
 995     break;
 996
 997   case ISD::AND: {
 998     // Perform some heroic transforms on an and of a constant-count shift
 999     // with a constant to enable use of the scaled offset field.
1000
1001     SDValue Shift = N.getOperand(0);
1002     if (Shift.getNumOperands() != 2) break;
1003
1004     // Scale must not be used already.
1005     if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
1006
1007     // Not when RIP is used as the base.
1008     if (AM.isRIPRel) break;
1009
1010     SDValue X = Shift.getOperand(0);
1011     ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
1012     ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
1013     if (!C1 || !C2) break;
1014
1015     // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
1016     // allows us to convert the shift and and into an h-register extract and
1017     // a scaled index.
1018     if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
1019       unsigned ScaleLog = 8 - C1->getZExtValue();
1020       if (ScaleLog > 0 && ScaleLog < 4 &&
1021           C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
1022         SDValue Eight = CurDAG->getConstant(8, MVT::i8);
1023         SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
1024         SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1025                                       X, Eight);
1026         SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
1027                                       Srl, Mask);
1028         SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
1029         SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
1030                                       And, ShlCount);
1031
1032         // Insert the new nodes into the topological ordering.
1033         if (Eight.getNode()->getNodeId() == -1 ||
1034             Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1035           CurDAG->RepositionNode(X.getNode(), Eight.getNode());
1036           Eight.getNode()->setNodeId(X.getNode()->getNodeId());
1037         }
1038         if (Mask.getNode()->getNodeId() == -1 ||
1039             Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1040           CurDAG->RepositionNode(X.getNode(), Mask.getNode());
1041           Mask.getNode()->setNodeId(X.getNode()->getNodeId());
1042         }
1043         if (Srl.getNode()->getNodeId() == -1 ||
1044             Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1045           CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
1046           Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
1047         }
1048         if (And.getNode()->getNodeId() == -1 ||
1049             And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1050           CurDAG->RepositionNode(N.getNode(), And.getNode());
1051           And.getNode()->setNodeId(N.getNode()->getNodeId());
1052         }
1053         if (ShlCount.getNode()->getNodeId() == -1 ||
1054             ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1055           CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
1056           ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
1057         }
1058         if (Shl.getNode()->getNodeId() == -1 ||
1059             Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1060           CurDAG->RepositionNode(N.getNode(), Shl.getNode());
1061           Shl.getNode()->setNodeId(N.getNode()->getNodeId());
1062         }
1063         CurDAG->ReplaceAllUsesWith(N, Shl);
1064         AM.IndexReg = And;
1065         AM.Scale = (1 << ScaleLog);
1066         return false;
1067       }
1068     }
1069
1070     // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
1071     // allows us to fold the shift into this addressing mode.
1072     if (Shift.getOpcode() != ISD::SHL) break;
1073
1074     // Not likely to be profitable if either the AND or SHIFT node has more
1075     // than one use (unless all uses are for address computation). Besides,
1076     // isel mechanism requires their node ids to be reused.
1077     if (!N.hasOneUse() || !Shift.hasOneUse())
1078       break;
1079
1080     // Verify that the shift amount is something we can fold.
1081     unsigned ShiftCst = C1->getZExtValue();
1082     if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
1083       break;
1084
1085     // Get the new AND mask, this folds to a constant.
1086     SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1087                                          SDValue(C2, 0), SDValue(C1, 0));
1088     SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
1089                                      NewANDMask);
1090     SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
1091                                        NewAND, SDValue(C1, 0));
1092
1093     // Insert the new nodes into the topological ordering.
1094     if (C1->getNodeId() > X.getNode()->getNodeId()) {
1095       CurDAG->RepositionNode(X.getNode(), C1);
1096       C1->setNodeId(X.getNode()->getNodeId());
1097     }
1098     if (NewANDMask.getNode()->getNodeId() == -1 ||
1099         NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1100       CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
1101       NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
1102     }
1103     if (NewAND.getNode()->getNodeId() == -1 ||
1104         NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1105       CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
1106       NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
1107     }
1108     if (NewSHIFT.getNode()->getNodeId() == -1 ||
1109         NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1110       CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
1111       NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
1112     }
1113
1114     CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
1115
1116     AM.Scale = 1 << ShiftCst;
1117     AM.IndexReg = NewAND;
1118     return false;
1119   }
1120   }
1121
1122   return MatchAddressBase(N, AM);
1123 }
1124
1125 /// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1126 /// specified addressing mode without any further recursion.
1127 bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1128   // Is the base register already occupied?
1129   if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
1130     // If so, check to see if the scale index register is set.
1131     if (AM.IndexReg.getNode() == 0 && !AM.isRIPRel) {
1132       AM.IndexReg = N;
1133       AM.Scale = 1;
1134       return false;
1135     }
1136
1137     // Otherwise, we cannot select it.
1138     return true;
1139   }
1140
1141   // Default, generate it as a register.
1142   AM.BaseType = X86ISelAddressMode::RegBase;
1143   AM.Base.Reg = N;
1144   return false;
1145 }
1146
1147 /// SelectAddr - returns true if it is able pattern match an addressing mode.
1148 /// It returns the operands which make up the maximal addressing mode it can
1149 /// match by reference.
1150 bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
1151                                  SDValue &Scale, SDValue &Index,
1152                                  SDValue &Disp, SDValue &Segment) {
1153   X86ISelAddressMode AM;
1154   bool Done = false;
1155   if (AvoidDupAddrCompute && !N.hasOneUse()) {
1156     unsigned Opcode = N.getOpcode();
1157     if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex &&
1158         Opcode != X86ISD::Wrapper) {
1159       // If we are able to fold N into addressing mode, then we'll allow it even
1160       // if N has multiple uses. In general, addressing computation is used as
1161       // addresses by all of its uses. But watch out for CopyToReg uses, that
1162       // means the address computation is liveout. It will be computed by a LEA
1163       // so we want to avoid computing the address twice.
1164       for (SDNode::use_iterator UI = N.getNode()->use_begin(),
1165              UE = N.getNode()->use_end(); UI != UE; ++UI) {
1166         if (UI->getOpcode() == ISD::CopyToReg) {
1167           MatchAddressBase(N, AM);
1168           Done = true;
1169           break;
1170         }
1171       }
1172     }
1173   }
1174
1175   if (!Done && MatchAddress(N, AM))
1176     return false;
1177
1178   MVT VT = N.getValueType();
1179   if (AM.BaseType == X86ISelAddressMode::RegBase) {
1180     if (!AM.Base.Reg.getNode())
1181       AM.Base.Reg = CurDAG->getRegister(0, VT);
1182   }
1183
1184   if (!AM.IndexReg.getNode())
1185     AM.IndexReg = CurDAG->getRegister(0, VT);
1186
1187   getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1188   return true;
1189 }
1190
1191 /// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
1192 /// match a load whose top elements are either undef or zeros.  The load flavor
1193 /// is derived from the type of N, which is either v4f32 or v2f64.
1194 bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
1195                                           SDValue N, SDValue &Base,
1196                                           SDValue &Scale, SDValue &Index,
1197                                           SDValue &Disp, SDValue &Segment,
1198                                           SDValue &InChain,
1199                                           SDValue &OutChain) {
1200   if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
1201     InChain = N.getOperand(0).getValue(1);
1202     if (ISD::isNON_EXTLoad(InChain.getNode()) &&
1203         InChain.getValue(0).hasOneUse() &&
1204         N.hasOneUse() &&
1205         IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {
1206       LoadSDNode *LD = cast<LoadSDNode>(InChain);
1207       if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1208         return false;
1209       OutChain = LD->getChain();
1210       return true;
1211     }
1212   }
1213
1214   // Also handle the case where we explicitly require zeros in the top
1215   // elements.  This is a vector shuffle from the zero vector.
1216   if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1217       // Check to see if the top elements are all zeros (or bitcast of zeros).
1218       N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
1219       N.getOperand(0).getNode()->hasOneUse() &&
1220       ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
1221       N.getOperand(0).getOperand(0).hasOneUse()) {
1222     // Okay, this is a zero extending load.  Fold it.
1223     LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
1224     if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1225       return false;
1226     OutChain = LD->getChain();
1227     InChain = SDValue(LD, 1);
1228     return true;
1229   }
1230   return false;
1231 }
1232
1233
1234 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1235 /// mode it matches can be cost effectively emitted as an LEA instruction.
1236 bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
1237                                     SDValue &Base, SDValue &Scale,
1238                                     SDValue &Index, SDValue &Disp) {
1239   X86ISelAddressMode AM;
1240
1241   // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1242   // segments.
1243   SDValue Copy = AM.Segment;
1244   SDValue T = CurDAG->getRegister(0, MVT::i32);
1245   AM.Segment = T;
1246   if (MatchAddress(N, AM))
1247     return false;
1248   assert (T == AM.Segment);
1249   AM.Segment = Copy;
1250
1251   MVT VT = N.getValueType();
1252   unsigned Complexity = 0;
1253   if (AM.BaseType == X86ISelAddressMode::RegBase)
1254     if (AM.Base.Reg.getNode())
1255       Complexity = 1;
1256     else
1257       AM.Base.Reg = CurDAG->getRegister(0, VT);
1258   else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1259     Complexity = 4;
1260
1261   if (AM.IndexReg.getNode())
1262     Complexity++;
1263   else
1264     AM.IndexReg = CurDAG->getRegister(0, VT);
1265
1266   // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1267   // a simple shift.
1268   if (AM.Scale > 1)
1269     Complexity++;
1270
1271   // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1272   // to a LEA. This is determined with some expermentation but is by no means
1273   // optimal (especially for code size consideration). LEA is nice because of
1274   // its three-address nature. Tweak the cost function again when we can run
1275   // convertToThreeAddress() at register allocation time.
1276   if (AM.hasSymbolicDisplacement()) {
1277     // For X86-64, we should always use lea to materialize RIP relative
1278     // addresses.
1279     if (Subtarget->is64Bit())
1280       Complexity = 4;
1281     else
1282       Complexity += 2;
1283   }
1284
1285   if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
1286     Complexity++;
1287
1288   if (Complexity > 2) {
1289     SDValue Segment;
1290     getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1291     return true;
1292   }
1293   return false;
1294 }
1295
1296 bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
1297                                   SDValue &Base, SDValue &Scale,
1298                                   SDValue &Index, SDValue &Disp,
1299                                   SDValue &Segment) {
1300   if (ISD::isNON_EXTLoad(N.getNode()) &&
1301       N.hasOneUse() &&
1302       IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
1303     return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
1304   return false;
1305 }
1306
1307 /// getGlobalBaseReg - Return an SDNode that returns the value of
1308 /// the global base register. Output instructions required to
1309 /// initialize the global base register, if necessary.
1310 ///
1311 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1312   MachineFunction *MF = CurBB->getParent();
1313   unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1314   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
1315 }
1316
1317 static SDNode *FindCallStartFromCall(SDNode *Node) {
1318   if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
1319     assert(Node->getOperand(0).getValueType() == MVT::Other &&
1320          "Node doesn't have a token chain argument!");
1321   return FindCallStartFromCall(Node->getOperand(0).getNode());
1322 }
1323
1324 SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
1325   SDValue Chain = Node->getOperand(0);
1326   SDValue In1 = Node->getOperand(1);
1327   SDValue In2L = Node->getOperand(2);
1328   SDValue In2H = Node->getOperand(3);
1329   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1330   if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1331     return NULL;
1332   SDValue LSI = Node->getOperand(4);    // MemOperand
1333   const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain};
1334   return CurDAG->getTargetNode(Opc, Node->getDebugLoc(),
1335                                MVT::i32, MVT::i32, MVT::Other, Ops,
1336                                array_lengthof(Ops));
1337 }
1338
1339 SDNode *X86DAGToDAGISel::Select(SDValue N) {
1340   SDNode *Node = N.getNode();
1341   MVT NVT = Node->getValueType(0);
1342   unsigned Opc, MOpc;
1343   unsigned Opcode = Node->getOpcode();
1344   DebugLoc dl = Node->getDebugLoc();
1345
1346 #ifndef NDEBUG
1347   DOUT << std::string(Indent, ' ') << "Selecting: ";
1348   DEBUG(Node->dump(CurDAG));
1349   DOUT << "\n";
1350   Indent += 2;
1351 #endif
1352
1353   if (Node->isMachineOpcode()) {
1354 #ifndef NDEBUG
1355     DOUT << std::string(Indent-2, ' ') << "== ";
1356     DEBUG(Node->dump(CurDAG));
1357     DOUT << "\n";
1358     Indent -= 2;
1359 #endif
1360     return NULL;   // Already selected.
1361   }
1362
1363   switch (Opcode) {
1364     default: break;
1365     case X86ISD::GlobalBaseReg:
1366       return getGlobalBaseReg();
1367
1368     case X86ISD::ATOMOR64_DAG:
1369       return SelectAtomic64(Node, X86::ATOMOR6432);
1370     case X86ISD::ATOMXOR64_DAG:
1371       return SelectAtomic64(Node, X86::ATOMXOR6432);
1372     case X86ISD::ATOMADD64_DAG:
1373       return SelectAtomic64(Node, X86::ATOMADD6432);
1374     case X86ISD::ATOMSUB64_DAG:
1375       return SelectAtomic64(Node, X86::ATOMSUB6432);
1376     case X86ISD::ATOMNAND64_DAG:
1377       return SelectAtomic64(Node, X86::ATOMNAND6432);
1378     case X86ISD::ATOMAND64_DAG:
1379       return SelectAtomic64(Node, X86::ATOMAND6432);
1380     case X86ISD::ATOMSWAP64_DAG:
1381       return SelectAtomic64(Node, X86::ATOMSWAP6432);
1382
1383     case ISD::SMUL_LOHI:
1384     case ISD::UMUL_LOHI: {
1385       SDValue N0 = Node->getOperand(0);
1386       SDValue N1 = Node->getOperand(1);
1387
1388       bool isSigned = Opcode == ISD::SMUL_LOHI;
1389       if (!isSigned)
1390         switch (NVT.getSimpleVT()) {
1391         default: assert(0 && "Unsupported VT!");
1392         case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
1393         case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
1394         case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
1395         case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
1396         }
1397       else
1398         switch (NVT.getSimpleVT()) {
1399         default: assert(0 && "Unsupported VT!");
1400         case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
1401         case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
1402         case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
1403         case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
1404         }
1405
1406       unsigned LoReg, HiReg;
1407       switch (NVT.getSimpleVT()) {
1408       default: assert(0 && "Unsupported VT!");
1409       case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
1410       case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
1411       case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
1412       case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
1413       }
1414
1415       SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1416       bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1417       // multiplty is commmutative
1418       if (!foldedLoad) {
1419         foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1420         if (foldedLoad)
1421           std::swap(N0, N1);
1422       }
1423
1424       SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
1425                                               N0, SDValue()).getValue(1);
1426
1427       if (foldedLoad) {
1428         SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
1429                           InFlag };
1430         SDNode *CNode =
1431           CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
1432                                 array_lengthof(Ops));
1433         InFlag = SDValue(CNode, 1);
1434         // Update the chain.
1435         ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1436       } else {
1437         InFlag =
1438           SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1439       }
1440
1441       // Copy the low half of the result, if it is needed.
1442       if (!N.getValue(0).use_empty()) {
1443         SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1444                                                   LoReg, NVT, InFlag);
1445         InFlag = Result.getValue(2);
1446         ReplaceUses(N.getValue(0), Result);
1447 #ifndef NDEBUG
1448         DOUT << std::string(Indent-2, ' ') << "=> ";
1449         DEBUG(Result.getNode()->dump(CurDAG));
1450         DOUT << "\n";
1451 #endif
1452       }
1453       // Copy the high half of the result, if it is needed.
1454       if (!N.getValue(1).use_empty()) {
1455         SDValue Result;
1456         if (HiReg == X86::AH && Subtarget->is64Bit()) {
1457           // Prevent use of AH in a REX instruction by referencing AX instead.
1458           // Shift it down 8 bits.
1459           Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1460                                           X86::AX, MVT::i16, InFlag);
1461           InFlag = Result.getValue(2);
1462           Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
1463                                                  Result,
1464                                      CurDAG->getTargetConstant(8, MVT::i8)), 0);
1465           // Then truncate it down to i8.
1466           SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
1467           Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
1468                                                    MVT::i8, Result, SRIdx), 0);
1469         } else {
1470           Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1471                                           HiReg, NVT, InFlag);
1472           InFlag = Result.getValue(2);
1473         }
1474         ReplaceUses(N.getValue(1), Result);
1475 #ifndef NDEBUG
1476         DOUT << std::string(Indent-2, ' ') << "=> ";
1477         DEBUG(Result.getNode()->dump(CurDAG));
1478         DOUT << "\n";
1479 #endif
1480       }
1481
1482 #ifndef NDEBUG
1483       Indent -= 2;
1484 #endif
1485
1486       return NULL;
1487     }
1488
1489     case ISD::SDIVREM:
1490     case ISD::UDIVREM: {
1491       SDValue N0 = Node->getOperand(0);
1492       SDValue N1 = Node->getOperand(1);
1493
1494       bool isSigned = Opcode == ISD::SDIVREM;
1495       if (!isSigned)
1496         switch (NVT.getSimpleVT()) {
1497         default: assert(0 && "Unsupported VT!");
1498         case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
1499         case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
1500         case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
1501         case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
1502         }
1503       else
1504         switch (NVT.getSimpleVT()) {
1505         default: assert(0 && "Unsupported VT!");
1506         case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
1507         case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
1508         case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
1509         case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
1510         }
1511
1512       unsigned LoReg, HiReg;
1513       unsigned ClrOpcode, SExtOpcode;
1514       switch (NVT.getSimpleVT()) {
1515       default: assert(0 && "Unsupported VT!");
1516       case MVT::i8:
1517         LoReg = X86::AL;  HiReg = X86::AH;
1518         ClrOpcode  = 0;
1519         SExtOpcode = X86::CBW;
1520         break;
1521       case MVT::i16:
1522         LoReg = X86::AX;  HiReg = X86::DX;
1523         ClrOpcode  = X86::MOV16r0;
1524         SExtOpcode = X86::CWD;
1525         break;
1526       case MVT::i32:
1527         LoReg = X86::EAX; HiReg = X86::EDX;
1528         ClrOpcode  = X86::MOV32r0;
1529         SExtOpcode = X86::CDQ;
1530         break;
1531       case MVT::i64:
1532         LoReg = X86::RAX; HiReg = X86::RDX;
1533         ClrOpcode  = X86::MOV64r0;
1534         SExtOpcode = X86::CQO;
1535         break;
1536       }
1537
1538       SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1539       bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1540       bool signBitIsZero = CurDAG->SignBitIsZero(N0);
1541
1542       SDValue InFlag;
1543       if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
1544         // Special case for div8, just use a move with zero extension to AX to
1545         // clear the upper 8 bits (AH).
1546         SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
1547         if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
1548           SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
1549           Move =
1550             SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16,
1551                                           MVT::Other, Ops,
1552                                           array_lengthof(Ops)), 0);
1553           Chain = Move.getValue(1);
1554           ReplaceUses(N0.getValue(1), Chain);
1555         } else {
1556           Move =
1557             SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
1558           Chain = CurDAG->getEntryNode();
1559         }
1560         Chain  = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
1561         InFlag = Chain.getValue(1);
1562       } else {
1563         InFlag =
1564           CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
1565                                LoReg, N0, SDValue()).getValue(1);
1566         if (isSigned && !signBitIsZero) {
1567           // Sign extend the low part into the high part.
1568           InFlag =
1569             SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
1570         } else {
1571           // Zero out the high part, effectively zero extending the input.
1572           SDValue ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT),
1573                                     0);
1574           InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg,
1575                                         ClrNode, InFlag).getValue(1);
1576         }
1577       }
1578
1579       if (foldedLoad) {
1580         SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
1581                           InFlag };
1582         SDNode *CNode =
1583           CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
1584                                 array_lengthof(Ops));
1585         InFlag = SDValue(CNode, 1);
1586         // Update the chain.
1587         ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1588       } else {
1589         InFlag =
1590           SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1591       }
1592
1593       // Copy the division (low) result, if it is needed.
1594       if (!N.getValue(0).use_empty()) {
1595         SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1596                                                   LoReg, NVT, InFlag);
1597         InFlag = Result.getValue(2);
1598         ReplaceUses(N.getValue(0), Result);
1599 #ifndef NDEBUG
1600         DOUT << std::string(Indent-2, ' ') << "=> ";
1601         DEBUG(Result.getNode()->dump(CurDAG));
1602         DOUT << "\n";
1603 #endif
1604       }
1605       // Copy the remainder (high) result, if it is needed.
1606       if (!N.getValue(1).use_empty()) {
1607         SDValue Result;
1608         if (HiReg == X86::AH && Subtarget->is64Bit()) {
1609           // Prevent use of AH in a REX instruction by referencing AX instead.
1610           // Shift it down 8 bits.
1611           Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1612                                           X86::AX, MVT::i16, InFlag);
1613           InFlag = Result.getValue(2);
1614           Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
1615                                         Result,
1616                                         CurDAG->getTargetConstant(8, MVT::i8)),
1617                            0);
1618           // Then truncate it down to i8.
1619           SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
1620           Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
1621                                                    MVT::i8, Result, SRIdx), 0);
1622         } else {
1623           Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1624                                           HiReg, NVT, InFlag);
1625           InFlag = Result.getValue(2);
1626         }
1627         ReplaceUses(N.getValue(1), Result);
1628 #ifndef NDEBUG
1629         DOUT << std::string(Indent-2, ' ') << "=> ";
1630         DEBUG(Result.getNode()->dump(CurDAG));
1631         DOUT << "\n";
1632 #endif
1633       }
1634
1635 #ifndef NDEBUG
1636       Indent -= 2;
1637 #endif
1638
1639       return NULL;
1640     }
1641
1642     case ISD::DECLARE: {
1643       // Handle DECLARE nodes here because the second operand may have been
1644       // wrapped in X86ISD::Wrapper.
1645       SDValue Chain = Node->getOperand(0);
1646       SDValue N1 = Node->getOperand(1);
1647       SDValue N2 = Node->getOperand(2);
1648       FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
1649
1650       // FIXME: We need to handle this for VLAs.
1651       if (!FINode) {
1652         ReplaceUses(N.getValue(0), Chain);
1653         return NULL;
1654       }
1655
1656       if (N2.getOpcode() == ISD::ADD &&
1657           N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg)
1658         N2 = N2.getOperand(1);
1659
1660       // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled
1661       // somehow, just ignore it.
1662       if (N2.getOpcode() != X86ISD::Wrapper) {
1663         ReplaceUses(N.getValue(0), Chain);
1664         return NULL;
1665       }
1666       GlobalAddressSDNode *GVNode =
1667         dyn_cast<GlobalAddressSDNode>(N2.getOperand(0));
1668       if (GVNode == 0) {
1669         ReplaceUses(N.getValue(0), Chain);
1670         return NULL;
1671       }
1672       SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
1673                                                  TLI.getPointerTy());
1674       SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(),
1675                                                     TLI.getPointerTy());
1676       SDValue Ops[] = { Tmp1, Tmp2, Chain };
1677       return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
1678                                    MVT::Other, Ops,
1679                                    array_lengthof(Ops));
1680     }
1681   }
1682
1683   SDNode *ResNode = SelectCode(N);
1684
1685 #ifndef NDEBUG
1686   DOUT << std::string(Indent-2, ' ') << "=> ";
1687   if (ResNode == NULL || ResNode == N.getNode())
1688     DEBUG(N.getNode()->dump(CurDAG));
1689   else
1690     DEBUG(ResNode->dump(CurDAG));
1691   DOUT << "\n";
1692   Indent -= 2;
1693 #endif
1694
1695   return ResNode;
1696 }
1697
1698 bool X86DAGToDAGISel::
1699 SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
1700                              std::vector<SDValue> &OutOps) {
1701   SDValue Op0, Op1, Op2, Op3, Op4;
1702   switch (ConstraintCode) {
1703   case 'o':   // offsetable        ??
1704   case 'v':   // not offsetable    ??
1705   default: return true;
1706   case 'm':   // memory
1707     if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4))
1708       return true;
1709     break;
1710   }
1711
1712   OutOps.push_back(Op0);
1713   OutOps.push_back(Op1);
1714   OutOps.push_back(Op2);
1715   OutOps.push_back(Op3);
1716   OutOps.push_back(Op4);
1717   return false;
1718 }
1719
1720 /// createX86ISelDag - This pass converts a legalized DAG into a
1721 /// X86-specific DAG, ready for instruction scheduling.
1722 ///
1723 FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
1724                                      llvm::CodeGenOpt::Level OptLevel) {
1725   return new X86DAGToDAGISel(TM, OptLevel);
1726 }