lib/Target/X86/X86ISelDAGToDAG.cpp

   1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines a DAG pattern matching instruction selector for X86,
  11 // converting from a legalized dag to a X86 dag.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "x86-isel"
  16 #include "X86.h"
  17 #include "X86InstrBuilder.h"
  18 #include "X86ISelLowering.h"
  19 #include "X86MachineFunctionInfo.h"
  20 #include "X86RegisterInfo.h"
  21 #include "X86Subtarget.h"
  22 #include "X86TargetMachine.h"
  23 #include "llvm/GlobalValue.h"
  24 #include "llvm/Instructions.h"
  25 #include "llvm/Intrinsics.h"
  26 #include "llvm/Support/CFG.h"
  27 #include "llvm/Type.h"
  28 #include "llvm/CodeGen/MachineConstantPool.h"
  29 #include "llvm/CodeGen/MachineFunction.h"
  30 #include "llvm/CodeGen/MachineFrameInfo.h"
  31 #include "llvm/CodeGen/MachineInstrBuilder.h"
  32 #include "llvm/CodeGen/MachineRegisterInfo.h"
  33 #include "llvm/CodeGen/SelectionDAGISel.h"
  34 #include "llvm/Target/TargetMachine.h"
  35 #include "llvm/Target/TargetOptions.h"
  36 #include "llvm/Support/Compiler.h"
  37 #include "llvm/Support/Debug.h"
  38 #include "llvm/Support/ErrorHandling.h"
  39 #include "llvm/Support/MathExtras.h"
  40 #include "llvm/Support/Streams.h"
  41 #include "llvm/Support/raw_ostream.h"
  42 #include "llvm/ADT/SmallPtrSet.h"
  43 #include "llvm/ADT/Statistic.h"
  44 using namespace llvm;
  45
  46 #include "llvm/Support/CommandLine.h"
  47 static cl::opt<bool> AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden);
  48
  49 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
  50
  51 //===----------------------------------------------------------------------===//
  52 //                      Pattern Matcher Implementation
  53 //===----------------------------------------------------------------------===//
  54
  55 namespace {
  56   /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
  57   /// SDValue's instead of register numbers for the leaves of the matched
  58   /// tree.
  59   struct X86ISelAddressMode {
  60     enum {
  61       RegBase,
  62       FrameIndexBase
  63     } BaseType;
  64
  65     struct {            // This is really a union, discriminated by BaseType!
  66       SDValue Reg;
  67       int FrameIndex;
  68     } Base;
  69
  70     unsigned Scale;
  71     SDValue IndexReg;
  72     int32_t Disp;
  73     SDValue Segment;
  74     GlobalValue *GV;
  75     Constant *CP;
  76     const char *ES;
  77     int JT;
  78     unsigned Align;    // CP alignment.
  79     unsigned char SymbolFlags;  // X86II::MO_*
  80
  81     X86ISelAddressMode()
  82       : BaseType(RegBase), Scale(1), IndexReg(), Disp(0),
  83         Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), SymbolFlags(0) {
  84     }
  85
  86     bool hasSymbolicDisplacement() const {
  87       return GV != 0 || CP != 0 || ES != 0 || JT != -1;
  88     }
  89
  90     bool hasBaseOrIndexReg() const {
  91       return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0;
  92     }
  93
  94     /// isRIPRelative - Return true if this addressing mode is already RIP
  95     /// relative.
  96     bool isRIPRelative() const {
  97       if (BaseType != RegBase) return false;
  98       if (RegisterSDNode *RegNode =
  99             dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode()))
 100         return RegNode->getReg() == X86::RIP;
 101       return false;
 102     }
 103
 104     void setBaseReg(SDValue Reg) {
 105       BaseType = RegBase;
 106       Base.Reg = Reg;
 107     }
 108
 109     void dump() {
 110       cerr << "X86ISelAddressMode " << this << "\n";
 111       cerr << "Base.Reg ";
 112               if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump();
 113               else cerr << "nul";
 114       cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
 115       cerr << " Scale" << Scale << "\n";
 116       cerr << "IndexReg ";
 117               if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
 118               else cerr << "nul";
 119       cerr << " Disp " << Disp << "\n";
 120       cerr << "GV "; if (GV) GV->dump();
 121                      else cerr << "nul";
 122       cerr << " CP "; if (CP) CP->dump();
 123                      else cerr << "nul";
 124       cerr << "\n";
 125       cerr << "ES "; if (ES) cerr << ES; else cerr << "nul";
 126       cerr  << " JT" << JT << " Align" << Align << "\n";
 127     }
 128   };
 129 }
 130
 131 namespace {
 132   //===--------------------------------------------------------------------===//
 133   /// ISel - X86 specific code to select X86 machine instructions for
 134   /// SelectionDAG operations.
 135   ///
 136   class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
 137     /// X86Lowering - This object fully describes how to lower LLVM code to an
 138     /// X86-specific SelectionDAG.
 139     X86TargetLowering &X86Lowering;
 140
 141     /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
 142     /// make the right decision when generating code for different targets.
 143     const X86Subtarget *Subtarget;
 144
 145     /// OptForSize - If true, selector should try to optimize for code size
 146     /// instead of performance.
 147     bool OptForSize;
 148
 149   public:
 150     explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
 151       : SelectionDAGISel(tm, OptLevel),
 152         X86Lowering(*tm.getTargetLowering()),
 153         Subtarget(&tm.getSubtarget<X86Subtarget>()),
 154         OptForSize(false) {}
 155
 156     virtual const char *getPassName() const {
 157       return "X86 DAG->DAG Instruction Selection";
 158     }
 159
 160     /// InstructionSelect - This callback is invoked by
 161     /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
 162     virtual void InstructionSelect();
 163
 164     virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
 165
 166     virtual
 167       bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
 168
 169 // Include the pieces autogenerated from the target description.
 170 #include "X86GenDAGISel.inc"
 171
 172   private:
 173     SDNode *Select(SDValue N);
 174     SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
 175     SDNode *SelectAtomicLoadAdd(SDNode *Node, MVT NVT);
 176
 177     bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
 178     bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
 179     bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
 180     bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
 181     bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 182                                  unsigned Depth);
 183     bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
 184     bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
 185                     SDValue &Scale, SDValue &Index, SDValue &Disp,
 186                     SDValue &Segment);
 187     bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
 188                        SDValue &Scale, SDValue &Index, SDValue &Disp);
 189     bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
 190                        SDValue &Scale, SDValue &Index, SDValue &Disp);
 191     bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
 192                              SDValue N, SDValue &Base, SDValue &Scale,
 193                              SDValue &Index, SDValue &Disp,
 194                              SDValue &Segment,
 195                              SDValue &InChain, SDValue &OutChain);
 196     bool TryFoldLoad(SDValue P, SDValue N,
 197                      SDValue &Base, SDValue &Scale,
 198                      SDValue &Index, SDValue &Disp,
 199                      SDValue &Segment);
 200     void PreprocessForRMW();
 201     void PreprocessForFPConvert();
 202
 203     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 204     /// inline asm expressions.
 205     virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
 206                                               char ConstraintCode,
 207                                               std::vector<SDValue> &OutOps);
 208
 209     void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
 210
 211     inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
 212                                    SDValue &Scale, SDValue &Index,
 213                                    SDValue &Disp, SDValue &Segment) {
 214       Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
 215         CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
 216         AM.Base.Reg;
 217       Scale = getI8Imm(AM.Scale);
 218       Index = AM.IndexReg;
 219       // These are 32-bit even in 64-bit mode since RIP relative offset
 220       // is 32-bit.
 221       if (AM.GV)
 222         Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp,
 223                                               AM.SymbolFlags);
 224       else if (AM.CP)
 225         Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
 226                                              AM.Align, AM.Disp, AM.SymbolFlags);
 227       else if (AM.ES)
 228         Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
 229       else if (AM.JT != -1)
 230         Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
 231       else
 232         Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
 233
 234       if (AM.Segment.getNode())
 235         Segment = AM.Segment;
 236       else
 237         Segment = CurDAG->getRegister(0, MVT::i32);
 238     }
 239
 240     /// getI8Imm - Return a target constant with the specified value, of type
 241     /// i8.
 242     inline SDValue getI8Imm(unsigned Imm) {
 243       return CurDAG->getTargetConstant(Imm, MVT::i8);
 244     }
 245
 246     /// getI16Imm - Return a target constant with the specified value, of type
 247     /// i16.
 248     inline SDValue getI16Imm(unsigned Imm) {
 249       return CurDAG->getTargetConstant(Imm, MVT::i16);
 250     }
 251
 252     /// getI32Imm - Return a target constant with the specified value, of type
 253     /// i32.
 254     inline SDValue getI32Imm(unsigned Imm) {
 255       return CurDAG->getTargetConstant(Imm, MVT::i32);
 256     }
 257
 258     /// getGlobalBaseReg - Return an SDNode that returns the value of
 259     /// the global base register. Output instructions required to
 260     /// initialize the global base register, if necessary.
 261     ///
 262     SDNode *getGlobalBaseReg();
 263
 264     /// getTargetMachine - Return a reference to the TargetMachine, casted
 265     /// to the target-specific type.
 266     const X86TargetMachine &getTargetMachine() {
 267       return static_cast<const X86TargetMachine &>(TM);
 268     }
 269
 270     /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
 271     /// to the target-specific type.
 272     const X86InstrInfo *getInstrInfo() {
 273       return getTargetMachine().getInstrInfo();
 274     }
 275
 276 #ifndef NDEBUG
 277     unsigned Indent;
 278 #endif
 279   };
 280 }
 281
 282
 283 bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
 284                                                  SDNode *Root) const {
 285   if (OptLevel == CodeGenOpt::None) return false;
 286
 287   if (U == Root)
 288     switch (U->getOpcode()) {
 289     default: break;
 290     case ISD::ADD:
 291     case ISD::ADDC:
 292     case ISD::ADDE:
 293     case ISD::AND:
 294     case ISD::OR:
 295     case ISD::XOR: {
 296       SDValue Op1 = U->getOperand(1);
 297
 298       // If the other operand is a 8-bit immediate we should fold the immediate
 299       // instead. This reduces code size.
 300       // e.g.
 301       // movl 4(%esp), %eax
 302       // addl $4, %eax
 303       // vs.
 304       // movl $4, %eax
 305       // addl 4(%esp), %eax
 306       // The former is 2 bytes shorter. In case where the increment is 1, then
 307       // the saving can be 4 bytes (by using incl %eax).
 308       if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
 309         if (Imm->getAPIntValue().isSignedIntN(8))
 310           return false;
 311
 312       // If the other operand is a TLS address, we should fold it instead.
 313       // This produces
 314       // movl    %gs:0, %eax
 315       // leal    i@NTPOFF(%eax), %eax
 316       // instead of
 317       // movl    $i@NTPOFF, %eax
 318       // addl    %gs:0, %eax
 319       // if the block also has an access to a second TLS address this will save
 320       // a load.
 321       // FIXME: This is probably also true for non TLS addresses.
 322       if (Op1.getOpcode() == X86ISD::Wrapper) {
 323         SDValue Val = Op1.getOperand(0);
 324         if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
 325           return false;
 326       }
 327     }
 328     }
 329
 330   // Proceed to 'generic' cycle finder code
 331   return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
 332 }
 333
 334 /// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
 335 /// and move load below the TokenFactor. Replace store's chain operand with
 336 /// load's chain result.
 337 static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
 338                                  SDValue Store, SDValue TF) {
 339   SmallVector<SDValue, 4> Ops;
 340   for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
 341     if (Load.getNode() == TF.getOperand(i).getNode())
 342       Ops.push_back(Load.getOperand(0));
 343     else
 344       Ops.push_back(TF.getOperand(i));
 345   SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
 346   SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF,
 347                                                Load.getOperand(1),
 348                                                Load.getOperand(2));
 349   CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1),
 350                              Store.getOperand(2), Store.getOperand(3));
 351 }
 352
 353 /// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
 354 ///
 355 static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
 356                       SDValue &Load) {
 357   if (N.getOpcode() == ISD::BIT_CONVERT)
 358     N = N.getOperand(0);
 359
 360   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
 361   if (!LD || LD->isVolatile())
 362     return false;
 363   if (LD->getAddressingMode() != ISD::UNINDEXED)
 364     return false;
 365
 366   ISD::LoadExtType ExtType = LD->getExtensionType();
 367   if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
 368     return false;
 369
 370   if (N.hasOneUse() &&
 371       N.getOperand(1) == Address &&
 372       N.getNode()->isOperandOf(Chain.getNode())) {
 373     Load = N;
 374     return true;
 375   }
 376   return false;
 377 }
 378
 379 /// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
 380 /// operand and move load below the call's chain operand.
 381 static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
 382                                   SDValue Call, SDValue CallSeqStart) {
 383   SmallVector<SDValue, 8> Ops;
 384   SDValue Chain = CallSeqStart.getOperand(0);
 385   if (Chain.getNode() == Load.getNode())
 386     Ops.push_back(Load.getOperand(0));
 387   else {
 388     assert(Chain.getOpcode() == ISD::TokenFactor &&
 389            "Unexpected CallSeqStart chain operand");
 390     for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
 391       if (Chain.getOperand(i).getNode() == Load.getNode())
 392         Ops.push_back(Load.getOperand(0));
 393       else
 394         Ops.push_back(Chain.getOperand(i));
 395     SDValue NewChain =
 396       CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
 397                       MVT::Other, &Ops[0], Ops.size());
 398     Ops.clear();
 399     Ops.push_back(NewChain);
 400   }
 401   for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i)
 402     Ops.push_back(CallSeqStart.getOperand(i));
 403   CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size());
 404   CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
 405                              Load.getOperand(1), Load.getOperand(2));
 406   Ops.clear();
 407   Ops.push_back(SDValue(Load.getNode(), 1));
 408   for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
 409     Ops.push_back(Call.getOperand(i));
 410   CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size());
 411 }
 412
 413 /// isCalleeLoad - Return true if call address is a load and it can be
 414 /// moved below CALLSEQ_START and the chains leading up to the call.
 415 /// Return the CALLSEQ_START by reference as a second output.
 416 static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
 417   if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
 418     return false;
 419   LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
 420   if (!LD ||
 421       LD->isVolatile() ||
 422       LD->getAddressingMode() != ISD::UNINDEXED ||
 423       LD->getExtensionType() != ISD::NON_EXTLOAD)
 424     return false;
 425
 426   // Now let's find the callseq_start.
 427   while (Chain.getOpcode() != ISD::CALLSEQ_START) {
 428     if (!Chain.hasOneUse())
 429       return false;
 430     Chain = Chain.getOperand(0);
 431   }
 432
 433   if (Chain.getOperand(0).getNode() == Callee.getNode())
 434     return true;
 435   if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
 436       Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()))
 437     return true;
 438   return false;
 439 }
 440
 441
 442 /// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
 443 /// This is only run if not in -O0 mode.
 444 /// This allows the instruction selector to pick more read-modify-write
 445 /// instructions. This is a common case:
 446 ///
 447 ///     [Load chain]
 448 ///         ^
 449 ///         |
 450 ///       [Load]
 451 ///       ^    ^
 452 ///       |    |
 453 ///      /      \-
 454 ///     /         |
 455 /// [TokenFactor] [Op]
 456 ///     ^          ^
 457 ///     |          |
 458 ///      \        /
 459 ///       \      /
 460 ///       [Store]
 461 ///
 462 /// The fact the store's chain operand != load's chain will prevent the
 463 /// (store (op (load))) instruction from being selected. We can transform it to:
 464 ///
 465 ///     [Load chain]
 466 ///         ^
 467 ///         |
 468 ///    [TokenFactor]
 469 ///         ^
 470 ///         |
 471 ///       [Load]
 472 ///       ^    ^
 473 ///       |    |
 474 ///       |     \-
 475 ///       |       |
 476 ///       |     [Op]
 477 ///       |       ^
 478 ///       |       |
 479 ///       \      /
 480 ///        \    /
 481 ///       [Store]
 482 void X86DAGToDAGISel::PreprocessForRMW() {
 483   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 484          E = CurDAG->allnodes_end(); I != E; ++I) {
 485     if (I->getOpcode() == X86ISD::CALL) {
 486       /// Also try moving call address load from outside callseq_start to just
 487       /// before the call to allow it to be folded.
 488       ///
 489       ///     [Load chain]
 490       ///         ^
 491       ///         |
 492       ///       [Load]
 493       ///       ^    ^
 494       ///       |    |
 495       ///      /      \--
 496       ///     /          |
 497       ///[CALLSEQ_START] |
 498       ///     ^          |
 499       ///     |          |
 500       /// [LOAD/C2Reg]   |
 501       ///     |          |
 502       ///      \        /
 503       ///       \      /
 504       ///       [CALL]
 505       SDValue Chain = I->getOperand(0);
 506       SDValue Load  = I->getOperand(1);
 507       if (!isCalleeLoad(Load, Chain))
 508         continue;
 509       MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
 510       ++NumLoadMoved;
 511       continue;
 512     }
 513
 514     if (!ISD::isNON_TRUNCStore(I))
 515       continue;
 516     SDValue Chain = I->getOperand(0);
 517
 518     if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
 519       continue;
 520
 521     SDValue N1 = I->getOperand(1);
 522     SDValue N2 = I->getOperand(2);
 523     if ((N1.getValueType().isFloatingPoint() &&
 524          !N1.getValueType().isVector()) ||
 525         !N1.hasOneUse())
 526       continue;
 527
 528     bool RModW = false;
 529     SDValue Load;
 530     unsigned Opcode = N1.getNode()->getOpcode();
 531     switch (Opcode) {
 532     case ISD::ADD:
 533     case ISD::MUL:
 534     case ISD::AND:
 535     case ISD::OR:
 536     case ISD::XOR:
 537     case ISD::ADDC:
 538     case ISD::ADDE:
 539     case ISD::VECTOR_SHUFFLE: {
 540       SDValue N10 = N1.getOperand(0);
 541       SDValue N11 = N1.getOperand(1);
 542       RModW = isRMWLoad(N10, Chain, N2, Load);
 543       if (!RModW)
 544         RModW = isRMWLoad(N11, Chain, N2, Load);
 545       break;
 546     }
 547     case ISD::SUB:
 548     case ISD::SHL:
 549     case ISD::SRA:
 550     case ISD::SRL:
 551     case ISD::ROTL:
 552     case ISD::ROTR:
 553     case ISD::SUBC:
 554     case ISD::SUBE:
 555     case X86ISD::SHLD:
 556     case X86ISD::SHRD: {
 557       SDValue N10 = N1.getOperand(0);
 558       RModW = isRMWLoad(N10, Chain, N2, Load);
 559       break;
 560     }
 561     }
 562
 563     if (RModW) {
 564       MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
 565       ++NumLoadMoved;
 566     }
 567   }
 568 }
 569
 570
 571 /// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
 572 /// nodes that target the FP stack to be store and load to the stack.  This is a
 573 /// gross hack.  We would like to simply mark these as being illegal, but when
 574 /// we do that, legalize produces these when it expands calls, then expands
 575 /// these in the same legalize pass.  We would like dag combine to be able to
 576 /// hack on these between the call expansion and the node legalization.  As such
 577 /// this pass basically does "really late" legalization of these inline with the
 578 /// X86 isel pass.
 579 void X86DAGToDAGISel::PreprocessForFPConvert() {
 580   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 581        E = CurDAG->allnodes_end(); I != E; ) {
 582     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
 583     if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
 584       continue;
 585
 586     // If the source and destination are SSE registers, then this is a legal
 587     // conversion that should not be lowered.
 588     MVT SrcVT = N->getOperand(0).getValueType();
 589     MVT DstVT = N->getValueType(0);
 590     bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
 591     bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
 592     if (SrcIsSSE && DstIsSSE)
 593       continue;
 594
 595     if (!SrcIsSSE && !DstIsSSE) {
 596       // If this is an FPStack extension, it is a noop.
 597       if (N->getOpcode() == ISD::FP_EXTEND)
 598         continue;
 599       // If this is a value-preserving FPStack truncation, it is a noop.
 600       if (N->getConstantOperandVal(1))
 601         continue;
 602     }
 603
 604     // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
 605     // FPStack has extload and truncstore.  SSE can fold direct loads into other
 606     // operations.  Based on this, decide what we want to do.
 607     MVT MemVT;
 608     if (N->getOpcode() == ISD::FP_ROUND)
 609       MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
 610     else
 611       MemVT = SrcIsSSE ? SrcVT : DstVT;
 612
 613     SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
 614     DebugLoc dl = N->getDebugLoc();
 615
 616     // FIXME: optimize the case where the src/dest is a load or store?
 617     SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
 618                                           N->getOperand(0),
 619                                           MemTmp, NULL, 0, MemVT);
 620     SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
 621                                         NULL, 0, MemVT);
 622
 623     // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
 624     // extload we created.  This will cause general havok on the dag because
 625     // anything below the conversion could be folded into other existing nodes.
 626     // To avoid invalidating 'I', back it up to the convert node.
 627     --I;
 628     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
 629
 630     // Now that we did that, the node is dead.  Increment the iterator to the
 631     // next node to process, then delete N.
 632     ++I;
 633     CurDAG->DeleteNode(N);
 634   }
 635 }
 636
 637 /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
 638 /// when it has created a SelectionDAG for us to codegen.
 639 void X86DAGToDAGISel::InstructionSelect() {
 640   const Function *F = MF->getFunction();
 641   OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
 642
 643   DEBUG(BB->dump());
 644   if (OptLevel != CodeGenOpt::None)
 645     PreprocessForRMW();
 646
 647   // FIXME: This should only happen when not compiled with -O0.
 648   PreprocessForFPConvert();
 649
 650   // Codegen the basic block.
 651 #ifndef NDEBUG
 652   DEBUG(errs() << "===== Instruction selection begins:\n");
 653   Indent = 0;
 654 #endif
 655   SelectRoot(*CurDAG);
 656 #ifndef NDEBUG
 657   DEBUG(errs() << "===== Instruction selection ends:\n");
 658 #endif
 659
 660   CurDAG->RemoveDeadNodes();
 661 }
 662
 663 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
 664 /// the main function.
 665 void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
 666                                              MachineFrameInfo *MFI) {
 667   const TargetInstrInfo *TII = TM.getInstrInfo();
 668   if (Subtarget->isTargetCygMing())
 669     BuildMI(BB, DebugLoc::getUnknownLoc(),
 670             TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
 671 }
 672
 673 void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
 674   // If this is main, emit special code for main.
 675   MachineBasicBlock *BB = MF.begin();
 676   if (Fn.hasExternalLinkage() && Fn.getName() == "main")
 677     EmitSpecialCodeForMain(BB, MF.getFrameInfo());
 678 }
 679
 680
 681 bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
 682                                               X86ISelAddressMode &AM) {
 683   assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
 684   SDValue Segment = N.getOperand(0);
 685
 686   if (AM.Segment.getNode() == 0) {
 687     AM.Segment = Segment;
 688     return false;
 689   }
 690
 691   return true;
 692 }
 693
 694 bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
 695   // This optimization is valid because the GNU TLS model defines that
 696   // gs:0 (or fs:0 on X86-64) contains its own address.
 697   // For more information see http://people.redhat.com/drepper/tls.pdf
 698
 699   SDValue Address = N.getOperand(1);
 700   if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
 701       !MatchSegmentBaseAddress (Address, AM))
 702     return false;
 703
 704   return true;
 705 }
 706
 707 /// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
 708 /// into an addressing mode.  These wrap things that will resolve down into a
 709 /// symbol reference.  If no match is possible, this returns true, otherwise it
 710 /// returns false.
 711 bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
 712   // If the addressing mode already has a symbol as the displacement, we can
 713   // never match another symbol.
 714   if (AM.hasSymbolicDisplacement())
 715     return true;
 716
 717   SDValue N0 = N.getOperand(0);
 718   CodeModel::Model M = TM.getCodeModel();
 719
 720   // Handle X86-64 rip-relative addresses.  We check this before checking direct
 721   // folding because RIP is preferable to non-RIP accesses.
 722   if (Subtarget->is64Bit() &&
 723       // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
 724       // they cannot be folded into immediate fields.
 725       // FIXME: This can be improved for kernel and other models?
 726       (M == CodeModel::Small || CodeModel::Kernel) &&
 727       // Base and index reg must be 0 in order to use %rip as base and lowering
 728       // must allow RIP.
 729       !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
 730     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
 731       int64_t Offset = AM.Disp + G->getOffset();
 732       if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
 733       AM.GV = G->getGlobal();
 734       AM.Disp = Offset;
 735       AM.SymbolFlags = G->getTargetFlags();
 736     } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
 737       int64_t Offset = AM.Disp + CP->getOffset();
 738       if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
 739       AM.CP = CP->getConstVal();
 740       AM.Align = CP->getAlignment();
 741       AM.Disp = Offset;
 742       AM.SymbolFlags = CP->getTargetFlags();
 743     } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
 744       AM.ES = S->getSymbol();
 745       AM.SymbolFlags = S->getTargetFlags();
 746     } else {
 747       JumpTableSDNode *J = cast<JumpTableSDNode>(N0);
 748       AM.JT = J->getIndex();
 749       AM.SymbolFlags = J->getTargetFlags();
 750     }
 751
 752     if (N.getOpcode() == X86ISD::WrapperRIP)
 753       AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
 754     return false;
 755   }
 756
 757   // Handle the case when globals fit in our immediate field: This is true for
 758   // X86-32 always and X86-64 when in -static -mcmodel=small mode.  In 64-bit
 759   // mode, this results in a non-RIP-relative computation.
 760   if (!Subtarget->is64Bit() ||
 761       ((M == CodeModel::Small || M == CodeModel::Kernel) &&
 762        TM.getRelocationModel() == Reloc::Static)) {
 763     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
 764       AM.GV = G->getGlobal();
 765       AM.Disp += G->getOffset();
 766       AM.SymbolFlags = G->getTargetFlags();
 767     } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
 768       AM.CP = CP->getConstVal();
 769       AM.Align = CP->getAlignment();
 770       AM.Disp += CP->getOffset();
 771       AM.SymbolFlags = CP->getTargetFlags();
 772     } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
 773       AM.ES = S->getSymbol();
 774       AM.SymbolFlags = S->getTargetFlags();
 775     } else {
 776       JumpTableSDNode *J = cast<JumpTableSDNode>(N0);
 777       AM.JT = J->getIndex();
 778       AM.SymbolFlags = J->getTargetFlags();
 779     }
 780     return false;
 781   }
 782
 783   return true;
 784 }
 785
 786 /// MatchAddress - Add the specified node to the specified addressing mode,
 787 /// returning true if it cannot be done.  This just pattern matches for the
 788 /// addressing mode.
 789 bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
 790   if (MatchAddressRecursively(N, AM, 0))
 791     return true;
 792
 793   // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
 794   // a smaller encoding and avoids a scaled-index.
 795   if (AM.Scale == 2 &&
 796       AM.BaseType == X86ISelAddressMode::RegBase &&
 797       AM.Base.Reg.getNode() == 0) {
 798     AM.Base.Reg = AM.IndexReg;
 799     AM.Scale = 1;
 800   }
 801
 802   return false;
 803 }
 804
 805 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 806                                               unsigned Depth) {
 807   bool is64Bit = Subtarget->is64Bit();
 808   DebugLoc dl = N.getDebugLoc();
 809   DEBUG(errs() << "MatchAddress: "); DEBUG(AM.dump());
 810   // Limit recursion.
 811   if (Depth > 5)
 812     return MatchAddressBase(N, AM);
 813
 814   CodeModel::Model M = TM.getCodeModel();
 815
 816   // If this is already a %rip relative address, we can only merge immediates
 817   // into it.  Instead of handling this in every case, we handle it here.
 818   // RIP relative addressing: %rip + 32-bit displacement!
 819   if (AM.isRIPRelative()) {
 820     // FIXME: JumpTable and ExternalSymbol address currently don't like
 821     // displacements.  It isn't very important, but this should be fixed for
 822     // consistency.
 823     if (!AM.ES && AM.JT != -1) return true;
 824
 825     if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
 826       int64_t Val = AM.Disp + Cst->getSExtValue();
 827       if (X86::isOffsetSuitableForCodeModel(Val, M,
 828                                             AM.hasSymbolicDisplacement())) {
 829         AM.Disp = Val;
 830         return false;
 831       }
 832     }
 833     return true;
 834   }
 835
 836   switch (N.getOpcode()) {
 837   default: break;
 838   case ISD::Constant: {
 839     uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
 840     if (!is64Bit ||
 841         X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M,
 842                                           AM.hasSymbolicDisplacement())) {
 843       AM.Disp += Val;
 844       return false;
 845     }
 846     break;
 847   }
 848
 849   case X86ISD::SegmentBaseAddress:
 850     if (!MatchSegmentBaseAddress(N, AM))
 851       return false;
 852     break;
 853
 854   case X86ISD::Wrapper:
 855   case X86ISD::WrapperRIP:
 856     if (!MatchWrapper(N, AM))
 857       return false;
 858     break;
 859
 860   case ISD::LOAD:
 861     if (!MatchLoad(N, AM))
 862       return false;
 863     break;
 864
 865   case ISD::FrameIndex:
 866     if (AM.BaseType == X86ISelAddressMode::RegBase
 867         && AM.Base.Reg.getNode() == 0) {
 868       AM.BaseType = X86ISelAddressMode::FrameIndexBase;
 869       AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
 870       return false;
 871     }
 872     break;
 873
 874   case ISD::SHL:
 875     if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
 876       break;
 877
 878     if (ConstantSDNode
 879           *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
 880       unsigned Val = CN->getZExtValue();
 881       // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
 882       // that the base operand remains free for further matching. If
 883       // the base doesn't end up getting used, a post-processing step
 884       // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
 885       if (Val == 1 || Val == 2 || Val == 3) {
 886         AM.Scale = 1 << Val;
 887         SDValue ShVal = N.getNode()->getOperand(0);
 888
 889         // Okay, we know that we have a scale by now.  However, if the scaled
 890         // value is an add of something and a constant, we can fold the
 891         // constant into the disp field here.
 892         if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
 893             isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
 894           AM.IndexReg = ShVal.getNode()->getOperand(0);
 895           ConstantSDNode *AddVal =
 896             cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
 897           uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
 898           if (!is64Bit ||
 899               X86::isOffsetSuitableForCodeModel(Disp, M,
 900                                                 AM.hasSymbolicDisplacement()))
 901             AM.Disp = Disp;
 902           else
 903             AM.IndexReg = ShVal;
 904         } else {
 905           AM.IndexReg = ShVal;
 906         }
 907         return false;
 908       }
 909     break;
 910     }
 911
 912   case ISD::SMUL_LOHI:
 913   case ISD::UMUL_LOHI:
 914     // A mul_lohi where we need the low part can be folded as a plain multiply.
 915     if (N.getResNo() != 0) break;
 916     // FALL THROUGH
 917   case ISD::MUL:
 918   case X86ISD::MUL_IMM:
 919     // X*[3,5,9] -> X+X*[2,4,8]
 920     if (AM.BaseType == X86ISelAddressMode::RegBase &&
 921         AM.Base.Reg.getNode() == 0 &&
 922         AM.IndexReg.getNode() == 0) {
 923       if (ConstantSDNode
 924             *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
 925         if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
 926             CN->getZExtValue() == 9) {
 927           AM.Scale = unsigned(CN->getZExtValue())-1;
 928
 929           SDValue MulVal = N.getNode()->getOperand(0);
 930           SDValue Reg;
 931
 932           // Okay, we know that we have a scale by now.  However, if the scaled
 933           // value is an add of something and a constant, we can fold the
 934           // constant into the disp field here.
 935           if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
 936               isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
 937             Reg = MulVal.getNode()->getOperand(0);
 938             ConstantSDNode *AddVal =
 939               cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
 940             uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
 941                                       CN->getZExtValue();
 942             if (!is64Bit ||
 943                 X86::isOffsetSuitableForCodeModel(Disp, M,
 944                                                   AM.hasSymbolicDisplacement()))
 945               AM.Disp = Disp;
 946             else
 947               Reg = N.getNode()->getOperand(0);
 948           } else {
 949             Reg = N.getNode()->getOperand(0);
 950           }
 951
 952           AM.IndexReg = AM.Base.Reg = Reg;
 953           return false;
 954         }
 955     }
 956     break;
 957
 958   case ISD::SUB: {
 959     // Given A-B, if A can be completely folded into the address and
 960     // the index field with the index field unused, use -B as the index.
 961     // This is a win if a has multiple parts that can be folded into
 962     // the address. Also, this saves a mov if the base register has
 963     // other uses, since it avoids a two-address sub instruction, however
 964     // it costs an additional mov if the index register has other uses.
 965
 966     // Test if the LHS of the sub can be folded.
 967     X86ISelAddressMode Backup = AM;
 968     if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
 969       AM = Backup;
 970       break;
 971     }
 972     // Test if the index field is free for use.
 973     if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
 974       AM = Backup;
 975       break;
 976     }
 977     int Cost = 0;
 978     SDValue RHS = N.getNode()->getOperand(1);
 979     // If the RHS involves a register with multiple uses, this
 980     // transformation incurs an extra mov, due to the neg instruction
 981     // clobbering its operand.
 982     if (!RHS.getNode()->hasOneUse() ||
 983         RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
 984         RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
 985         RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
 986         (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
 987          RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
 988       ++Cost;
 989     // If the base is a register with multiple uses, this
 990     // transformation may save a mov.
 991     if ((AM.BaseType == X86ISelAddressMode::RegBase &&
 992          AM.Base.Reg.getNode() &&
 993          !AM.Base.Reg.getNode()->hasOneUse()) ||
 994         AM.BaseType == X86ISelAddressMode::FrameIndexBase)
 995       --Cost;
 996     // If the folded LHS was interesting, this transformation saves
 997     // address arithmetic.
 998     if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
 999         ((AM.Disp != 0) && (Backup.Disp == 0)) +
1000         (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1001       --Cost;
1002     // If it doesn't look like it may be an overall win, don't do it.
1003     if (Cost >= 0) {
1004       AM = Backup;
1005       break;
1006     }
1007
1008     // Ok, the transformation is legal and appears profitable. Go for it.
1009     SDValue Zero = CurDAG->getConstant(0, N.getValueType());
1010     SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1011     AM.IndexReg = Neg;
1012     AM.Scale = 1;
1013
1014     // Insert the new nodes into the topological ordering.
1015     if (Zero.getNode()->getNodeId() == -1 ||
1016         Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1017       CurDAG->RepositionNode(N.getNode(), Zero.getNode());
1018       Zero.getNode()->setNodeId(N.getNode()->getNodeId());
1019     }
1020     if (Neg.getNode()->getNodeId() == -1 ||
1021         Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1022       CurDAG->RepositionNode(N.getNode(), Neg.getNode());
1023       Neg.getNode()->setNodeId(N.getNode()->getNodeId());
1024     }
1025     return false;
1026   }
1027
1028   case ISD::ADD: {
1029     X86ISelAddressMode Backup = AM;
1030     if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) &&
1031         !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1))
1032       return false;
1033     AM = Backup;
1034     if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) &&
1035         !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1))
1036       return false;
1037     AM = Backup;
1038
1039     // If we couldn't fold both operands into the address at the same time,
1040     // see if we can just put each operand into a register and fold at least
1041     // the add.
1042     if (AM.BaseType == X86ISelAddressMode::RegBase &&
1043         !AM.Base.Reg.getNode() &&
1044         !AM.IndexReg.getNode()) {
1045       AM.Base.Reg = N.getNode()->getOperand(0);
1046       AM.IndexReg = N.getNode()->getOperand(1);
1047       AM.Scale = 1;
1048       return false;
1049     }
1050     break;
1051   }
1052
1053   case ISD::OR:
1054     // Handle "X | C" as "X + C" iff X is known to have C bits clear.
1055     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1056       X86ISelAddressMode Backup = AM;
1057       uint64_t Offset = CN->getSExtValue();
1058       // Start with the LHS as an addr mode.
1059       if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1060           // Address could not have picked a GV address for the displacement.
1061           AM.GV == NULL &&
1062           // On x86-64, the resultant disp must fit in 32-bits.
1063           (!is64Bit ||
1064            X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M,
1065                                              AM.hasSymbolicDisplacement())) &&
1066           // Check to see if the LHS & C is zero.
1067           CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
1068         AM.Disp += Offset;
1069         return false;
1070       }
1071       AM = Backup;
1072     }
1073     break;
1074
1075   case ISD::AND: {
1076     // Perform some heroic transforms on an and of a constant-count shift
1077     // with a constant to enable use of the scaled offset field.
1078
1079     SDValue Shift = N.getOperand(0);
1080     if (Shift.getNumOperands() != 2) break;
1081
1082     // Scale must not be used already.
1083     if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
1084
1085     SDValue X = Shift.getOperand(0);
1086     ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
1087     ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
1088     if (!C1 || !C2) break;
1089
1090     // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
1091     // allows us to convert the shift and and into an h-register extract and
1092     // a scaled index.
1093     if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
1094       unsigned ScaleLog = 8 - C1->getZExtValue();
1095       if (ScaleLog > 0 && ScaleLog < 4 &&
1096           C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
1097         SDValue Eight = CurDAG->getConstant(8, MVT::i8);
1098         SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
1099         SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1100                                       X, Eight);
1101         SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
1102                                       Srl, Mask);
1103         SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
1104         SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
1105                                       And, ShlCount);
1106
1107         // Insert the new nodes into the topological ordering.
1108         if (Eight.getNode()->getNodeId() == -1 ||
1109             Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1110           CurDAG->RepositionNode(X.getNode(), Eight.getNode());
1111           Eight.getNode()->setNodeId(X.getNode()->getNodeId());
1112         }
1113         if (Mask.getNode()->getNodeId() == -1 ||
1114             Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1115           CurDAG->RepositionNode(X.getNode(), Mask.getNode());
1116           Mask.getNode()->setNodeId(X.getNode()->getNodeId());
1117         }
1118         if (Srl.getNode()->getNodeId() == -1 ||
1119             Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1120           CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
1121           Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
1122         }
1123         if (And.getNode()->getNodeId() == -1 ||
1124             And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1125           CurDAG->RepositionNode(N.getNode(), And.getNode());
1126           And.getNode()->setNodeId(N.getNode()->getNodeId());
1127         }
1128         if (ShlCount.getNode()->getNodeId() == -1 ||
1129             ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1130           CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
1131           ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
1132         }
1133         if (Shl.getNode()->getNodeId() == -1 ||
1134             Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1135           CurDAG->RepositionNode(N.getNode(), Shl.getNode());
1136           Shl.getNode()->setNodeId(N.getNode()->getNodeId());
1137         }
1138         CurDAG->ReplaceAllUsesWith(N, Shl);
1139         AM.IndexReg = And;
1140         AM.Scale = (1 << ScaleLog);
1141         return false;
1142       }
1143     }
1144
1145     // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
1146     // allows us to fold the shift into this addressing mode.
1147     if (Shift.getOpcode() != ISD::SHL) break;
1148
1149     // Not likely to be profitable if either the AND or SHIFT node has more
1150     // than one use (unless all uses are for address computation). Besides,
1151     // isel mechanism requires their node ids to be reused.
1152     if (!N.hasOneUse() || !Shift.hasOneUse())
1153       break;
1154
1155     // Verify that the shift amount is something we can fold.
1156     unsigned ShiftCst = C1->getZExtValue();
1157     if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
1158       break;
1159
1160     // Get the new AND mask, this folds to a constant.
1161     SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
1162                                          SDValue(C2, 0), SDValue(C1, 0));
1163     SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
1164                                      NewANDMask);
1165     SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
1166                                        NewAND, SDValue(C1, 0));
1167
1168     // Insert the new nodes into the topological ordering.
1169     if (C1->getNodeId() > X.getNode()->getNodeId()) {
1170       CurDAG->RepositionNode(X.getNode(), C1);
1171       C1->setNodeId(X.getNode()->getNodeId());
1172     }
1173     if (NewANDMask.getNode()->getNodeId() == -1 ||
1174         NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
1175       CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
1176       NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
1177     }
1178     if (NewAND.getNode()->getNodeId() == -1 ||
1179         NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
1180       CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
1181       NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
1182     }
1183     if (NewSHIFT.getNode()->getNodeId() == -1 ||
1184         NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
1185       CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
1186       NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
1187     }
1188
1189     CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
1190
1191     AM.Scale = 1 << ShiftCst;
1192     AM.IndexReg = NewAND;
1193     return false;
1194   }
1195   }
1196
1197   return MatchAddressBase(N, AM);
1198 }
1199
1200 /// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1201 /// specified addressing mode without any further recursion.
1202 bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1203   // Is the base register already occupied?
1204   if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
1205     // If so, check to see if the scale index register is set.
1206     if (AM.IndexReg.getNode() == 0) {
1207       AM.IndexReg = N;
1208       AM.Scale = 1;
1209       return false;
1210     }
1211
1212     // Otherwise, we cannot select it.
1213     return true;
1214   }
1215
1216   // Default, generate it as a register.
1217   AM.BaseType = X86ISelAddressMode::RegBase;
1218   AM.Base.Reg = N;
1219   return false;
1220 }
1221
1222 /// SelectAddr - returns true if it is able pattern match an addressing mode.
1223 /// It returns the operands which make up the maximal addressing mode it can
1224 /// match by reference.
1225 bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
1226                                  SDValue &Scale, SDValue &Index,
1227                                  SDValue &Disp, SDValue &Segment) {
1228   X86ISelAddressMode AM;
1229   bool Done = false;
1230   if (AvoidDupAddrCompute && !N.hasOneUse()) {
1231     unsigned Opcode = N.getOpcode();
1232     if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex &&
1233         Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) {
1234       // If we are able to fold N into addressing mode, then we'll allow it even
1235       // if N has multiple uses. In general, addressing computation is used as
1236       // addresses by all of its uses. But watch out for CopyToReg uses, that
1237       // means the address computation is liveout. It will be computed by a LEA
1238       // so we want to avoid computing the address twice.
1239       for (SDNode::use_iterator UI = N.getNode()->use_begin(),
1240              UE = N.getNode()->use_end(); UI != UE; ++UI) {
1241         if (UI->getOpcode() == ISD::CopyToReg) {
1242           MatchAddressBase(N, AM);
1243           Done = true;
1244           break;
1245         }
1246       }
1247     }
1248   }
1249
1250   if (!Done && MatchAddress(N, AM))
1251     return false;
1252
1253   MVT VT = N.getValueType();
1254   if (AM.BaseType == X86ISelAddressMode::RegBase) {
1255     if (!AM.Base.Reg.getNode())
1256       AM.Base.Reg = CurDAG->getRegister(0, VT);
1257   }
1258
1259   if (!AM.IndexReg.getNode())
1260     AM.IndexReg = CurDAG->getRegister(0, VT);
1261
1262   getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1263   return true;
1264 }
1265
1266 /// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
1267 /// match a load whose top elements are either undef or zeros.  The load flavor
1268 /// is derived from the type of N, which is either v4f32 or v2f64.
1269 bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
1270                                           SDValue N, SDValue &Base,
1271                                           SDValue &Scale, SDValue &Index,
1272                                           SDValue &Disp, SDValue &Segment,
1273                                           SDValue &InChain,
1274                                           SDValue &OutChain) {
1275   if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
1276     InChain = N.getOperand(0).getValue(1);
1277     if (ISD::isNON_EXTLoad(InChain.getNode()) &&
1278         InChain.getValue(0).hasOneUse() &&
1279         N.hasOneUse() &&
1280         IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {
1281       LoadSDNode *LD = cast<LoadSDNode>(InChain);
1282       if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1283         return false;
1284       OutChain = LD->getChain();
1285       return true;
1286     }
1287   }
1288
1289   // Also handle the case where we explicitly require zeros in the top
1290   // elements.  This is a vector shuffle from the zero vector.
1291   if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1292       // Check to see if the top elements are all zeros (or bitcast of zeros).
1293       N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
1294       N.getOperand(0).getNode()->hasOneUse() &&
1295       ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
1296       N.getOperand(0).getOperand(0).hasOneUse()) {
1297     // Okay, this is a zero extending load.  Fold it.
1298     LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
1299     if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1300       return false;
1301     OutChain = LD->getChain();
1302     InChain = SDValue(LD, 1);
1303     return true;
1304   }
1305   return false;
1306 }
1307
1308
1309 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1310 /// mode it matches can be cost effectively emitted as an LEA instruction.
1311 bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
1312                                     SDValue &Base, SDValue &Scale,
1313                                     SDValue &Index, SDValue &Disp) {
1314   X86ISelAddressMode AM;
1315
1316   // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1317   // segments.
1318   SDValue Copy = AM.Segment;
1319   SDValue T = CurDAG->getRegister(0, MVT::i32);
1320   AM.Segment = T;
1321   if (MatchAddress(N, AM))
1322     return false;
1323   assert (T == AM.Segment);
1324   AM.Segment = Copy;
1325
1326   MVT VT = N.getValueType();
1327   unsigned Complexity = 0;
1328   if (AM.BaseType == X86ISelAddressMode::RegBase)
1329     if (AM.Base.Reg.getNode())
1330       Complexity = 1;
1331     else
1332       AM.Base.Reg = CurDAG->getRegister(0, VT);
1333   else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1334     Complexity = 4;
1335
1336   if (AM.IndexReg.getNode())
1337     Complexity++;
1338   else
1339     AM.IndexReg = CurDAG->getRegister(0, VT);
1340
1341   // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1342   // a simple shift.
1343   if (AM.Scale > 1)
1344     Complexity++;
1345
1346   // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1347   // to a LEA. This is determined with some expermentation but is by no means
1348   // optimal (especially for code size consideration). LEA is nice because of
1349   // its three-address nature. Tweak the cost function again when we can run
1350   // convertToThreeAddress() at register allocation time.
1351   if (AM.hasSymbolicDisplacement()) {
1352     // For X86-64, we should always use lea to materialize RIP relative
1353     // addresses.
1354     if (Subtarget->is64Bit())
1355       Complexity = 4;
1356     else
1357       Complexity += 2;
1358   }
1359
1360   if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
1361     Complexity++;
1362
1363   // If it isn't worth using an LEA, reject it.
1364   if (Complexity <= 2)
1365     return false;
1366
1367   SDValue Segment;
1368   getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1369   return true;
1370 }
1371
1372 /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
1373 bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
1374                                         SDValue &Scale, SDValue &Index,
1375                                         SDValue &Disp) {
1376   assert(Op.getOpcode() == X86ISD::TLSADDR);
1377   assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
1378   const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1379
1380   X86ISelAddressMode AM;
1381   AM.GV = GA->getGlobal();
1382   AM.Disp += GA->getOffset();
1383   AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
1384   AM.SymbolFlags = GA->getTargetFlags();
1385
1386   if (N.getValueType() == MVT::i32) {
1387     AM.Scale = 1;
1388     AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1389   } else {
1390     AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1391   }
1392
1393   SDValue Segment;
1394   getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
1395   return true;
1396 }
1397
1398
1399 bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
1400                                   SDValue &Base, SDValue &Scale,
1401                                   SDValue &Index, SDValue &Disp,
1402                                   SDValue &Segment) {
1403   if (ISD::isNON_EXTLoad(N.getNode()) &&
1404       N.hasOneUse() &&
1405       IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
1406     return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
1407   return false;
1408 }
1409
1410 /// getGlobalBaseReg - Return an SDNode that returns the value of
1411 /// the global base register. Output instructions required to
1412 /// initialize the global base register, if necessary.
1413 ///
1414 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1415   unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1416   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
1417 }
1418
1419 static SDNode *FindCallStartFromCall(SDNode *Node) {
1420   if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
1421     assert(Node->getOperand(0).getValueType() == MVT::Other &&
1422          "Node doesn't have a token chain argument!");
1423   return FindCallStartFromCall(Node->getOperand(0).getNode());
1424 }
1425
1426 SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
1427   SDValue Chain = Node->getOperand(0);
1428   SDValue In1 = Node->getOperand(1);
1429   SDValue In2L = Node->getOperand(2);
1430   SDValue In2H = Node->getOperand(3);
1431   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1432   if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1433     return NULL;
1434   SDValue LSI = Node->getOperand(4);    // MemOperand
1435   const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain};
1436   return CurDAG->getTargetNode(Opc, Node->getDebugLoc(),
1437                                MVT::i32, MVT::i32, MVT::Other, Ops,
1438                                array_lengthof(Ops));
1439 }
1440
1441 SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, MVT NVT) {
1442   if (Node->hasAnyUseOfValue(0))
1443     return 0;
1444
1445   // Optimize common patterns for __sync_add_and_fetch and
1446   // __sync_sub_and_fetch where the result is not used. This allows us
1447   // to use "lock" version of add, sub, inc, dec instructions.
1448   // FIXME: Do not use special instructions but instead add the "lock"
1449   // prefix to the target node somehow. The extra information will then be
1450   // transferred to machine instruction and it denotes the prefix.
1451   SDValue Chain = Node->getOperand(0);
1452   SDValue Ptr = Node->getOperand(1);
1453   SDValue Val = Node->getOperand(2);
1454   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1455   if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
1456     return 0;
1457
1458   bool isInc = false, isDec = false, isSub = false, isCN = false;
1459   ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
1460   if (CN) {
1461     isCN = true;
1462     int64_t CNVal = CN->getSExtValue();
1463     if (CNVal == 1)
1464       isInc = true;
1465     else if (CNVal == -1)
1466       isDec = true;
1467     else if (CNVal >= 0)
1468       Val = CurDAG->getTargetConstant(CNVal, NVT);
1469     else {
1470       isSub = true;
1471       Val = CurDAG->getTargetConstant(-CNVal, NVT);
1472     }
1473   } else if (Val.hasOneUse() &&
1474              Val.getOpcode() == ISD::SUB &&
1475              X86::isZeroNode(Val.getOperand(0))) {
1476     isSub = true;
1477     Val = Val.getOperand(1);
1478   }
1479
1480   unsigned Opc = 0;
1481   switch (NVT.getSimpleVT()) {
1482   default: return 0;
1483   case MVT::i8:
1484     if (isInc)
1485       Opc = X86::LOCK_INC8m;
1486     else if (isDec)
1487       Opc = X86::LOCK_DEC8m;
1488     else if (isSub) {
1489       if (isCN)
1490         Opc = X86::LOCK_SUB8mi;
1491       else
1492         Opc = X86::LOCK_SUB8mr;
1493     } else {
1494       if (isCN)
1495         Opc = X86::LOCK_ADD8mi;
1496       else
1497         Opc = X86::LOCK_ADD8mr;
1498     }
1499     break;
1500   case MVT::i16:
1501     if (isInc)
1502       Opc = X86::LOCK_INC16m;
1503     else if (isDec)
1504       Opc = X86::LOCK_DEC16m;
1505     else if (isSub) {
1506       if (isCN) {
1507         if (Predicate_i16immSExt8(Val.getNode()))
1508           Opc = X86::LOCK_SUB16mi8;
1509         else
1510           Opc = X86::LOCK_SUB16mi;
1511       } else
1512         Opc = X86::LOCK_SUB16mr;
1513     } else {
1514       if (isCN) {
1515         if (Predicate_i16immSExt8(Val.getNode()))
1516           Opc = X86::LOCK_ADD16mi8;
1517         else
1518           Opc = X86::LOCK_ADD16mi;
1519       } else
1520         Opc = X86::LOCK_ADD16mr;
1521     }
1522     break;
1523   case MVT::i32:
1524     if (isInc)
1525       Opc = X86::LOCK_INC32m;
1526     else if (isDec)
1527       Opc = X86::LOCK_DEC32m;
1528     else if (isSub) {
1529       if (isCN) {
1530         if (Predicate_i32immSExt8(Val.getNode()))
1531           Opc = X86::LOCK_SUB32mi8;
1532         else
1533           Opc = X86::LOCK_SUB32mi;
1534       } else
1535         Opc = X86::LOCK_SUB32mr;
1536     } else {
1537       if (isCN) {
1538         if (Predicate_i32immSExt8(Val.getNode()))
1539           Opc = X86::LOCK_ADD32mi8;
1540         else
1541           Opc = X86::LOCK_ADD32mi;
1542       } else
1543         Opc = X86::LOCK_ADD32mr;
1544     }
1545     break;
1546   case MVT::i64:
1547     if (isInc)
1548       Opc = X86::LOCK_INC64m;
1549     else if (isDec)
1550       Opc = X86::LOCK_DEC64m;
1551     else if (isSub) {
1552       Opc = X86::LOCK_SUB64mr;
1553       if (isCN) {
1554         if (Predicate_i64immSExt8(Val.getNode()))
1555           Opc = X86::LOCK_SUB64mi8;
1556         else if (Predicate_i64immSExt32(Val.getNode()))
1557           Opc = X86::LOCK_SUB64mi32;
1558       }
1559     } else {
1560       Opc = X86::LOCK_ADD64mr;
1561       if (isCN) {
1562         if (Predicate_i64immSExt8(Val.getNode()))
1563           Opc = X86::LOCK_ADD64mi8;
1564         else if (Predicate_i64immSExt32(Val.getNode()))
1565           Opc = X86::LOCK_ADD64mi32;
1566       }
1567     }
1568     break;
1569   }
1570
1571   DebugLoc dl = Node->getDebugLoc();
1572   SDValue Undef = SDValue(CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF,
1573                                                 dl, NVT), 0);
1574   SDValue MemOp = CurDAG->getMemOperand(cast<MemSDNode>(Node)->getMemOperand());
1575   if (isInc || isDec) {
1576     SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, MemOp, Chain };
1577     SDValue Ret = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 7), 0);
1578     SDValue RetVals[] = { Undef, Ret };
1579     return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
1580   } else {
1581     SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, MemOp, Chain };
1582     SDValue Ret = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 8), 0);
1583     SDValue RetVals[] = { Undef, Ret };
1584     return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
1585   }
1586 }
1587
1588 SDNode *X86DAGToDAGISel::Select(SDValue N) {
1589   SDNode *Node = N.getNode();
1590   MVT NVT = Node->getValueType(0);
1591   unsigned Opc, MOpc;
1592   unsigned Opcode = Node->getOpcode();
1593   DebugLoc dl = Node->getDebugLoc();
1594
1595 #ifndef NDEBUG
1596   DEBUG(errs() << std::string(Indent, ' ') << "Selecting: ");
1597   DEBUG(Node->dump(CurDAG));
1598   DEBUG(errs() << "\n");
1599   Indent += 2;
1600 #endif
1601
1602   if (Node->isMachineOpcode()) {
1603 #ifndef NDEBUG
1604     DEBUG(errs() << std::string(Indent-2, ' ') << "== ");
1605     DEBUG(Node->dump(CurDAG));
1606     DEBUG(errs() << "\n");
1607     Indent -= 2;
1608 #endif
1609     return NULL;   // Already selected.
1610   }
1611
1612   switch (Opcode) {
1613   default: break;
1614   case X86ISD::GlobalBaseReg:
1615     return getGlobalBaseReg();
1616
1617   case X86ISD::ATOMOR64_DAG:
1618     return SelectAtomic64(Node, X86::ATOMOR6432);
1619   case X86ISD::ATOMXOR64_DAG:
1620     return SelectAtomic64(Node, X86::ATOMXOR6432);
1621   case X86ISD::ATOMADD64_DAG:
1622     return SelectAtomic64(Node, X86::ATOMADD6432);
1623   case X86ISD::ATOMSUB64_DAG:
1624     return SelectAtomic64(Node, X86::ATOMSUB6432);
1625   case X86ISD::ATOMNAND64_DAG:
1626     return SelectAtomic64(Node, X86::ATOMNAND6432);
1627   case X86ISD::ATOMAND64_DAG:
1628     return SelectAtomic64(Node, X86::ATOMAND6432);
1629   case X86ISD::ATOMSWAP64_DAG:
1630     return SelectAtomic64(Node, X86::ATOMSWAP6432);
1631
1632   case ISD::ATOMIC_LOAD_ADD: {
1633     SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
1634     if (RetVal)
1635       return RetVal;
1636     break;
1637   }
1638
1639   case ISD::SMUL_LOHI:
1640   case ISD::UMUL_LOHI: {
1641     SDValue N0 = Node->getOperand(0);
1642     SDValue N1 = Node->getOperand(1);
1643
1644     bool isSigned = Opcode == ISD::SMUL_LOHI;
1645     if (!isSigned)
1646       switch (NVT.getSimpleVT()) {
1647       default: llvm_unreachable("Unsupported VT!");
1648       case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
1649       case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
1650       case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
1651       case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
1652       }
1653     else
1654       switch (NVT.getSimpleVT()) {
1655       default: llvm_unreachable("Unsupported VT!");
1656       case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
1657       case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
1658       case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
1659       case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
1660       }
1661
1662     unsigned LoReg, HiReg;
1663     switch (NVT.getSimpleVT()) {
1664     default: llvm_unreachable("Unsupported VT!");
1665     case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
1666     case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
1667     case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
1668     case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
1669     }
1670
1671     SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1672     bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1673     // multiplty is commmutative
1674     if (!foldedLoad) {
1675       foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1676       if (foldedLoad)
1677         std::swap(N0, N1);
1678     }
1679
1680     SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
1681                                             N0, SDValue()).getValue(1);
1682
1683     if (foldedLoad) {
1684       SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
1685                         InFlag };
1686       SDNode *CNode =
1687         CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
1688                               array_lengthof(Ops));
1689       InFlag = SDValue(CNode, 1);
1690       // Update the chain.
1691       ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1692     } else {
1693       InFlag =
1694         SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1695     }
1696
1697     // Copy the low half of the result, if it is needed.
1698     if (!N.getValue(0).use_empty()) {
1699       SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1700                                                 LoReg, NVT, InFlag);
1701       InFlag = Result.getValue(2);
1702       ReplaceUses(N.getValue(0), Result);
1703 #ifndef NDEBUG
1704       DEBUG(errs() << std::string(Indent-2, ' ') << "=> ");
1705       DEBUG(Result.getNode()->dump(CurDAG));
1706       DEBUG(errs() << "\n");
1707 #endif
1708     }
1709     // Copy the high half of the result, if it is needed.
1710     if (!N.getValue(1).use_empty()) {
1711       SDValue Result;
1712       if (HiReg == X86::AH && Subtarget->is64Bit()) {
1713         // Prevent use of AH in a REX instruction by referencing AX instead.
1714         // Shift it down 8 bits.
1715         Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1716                                         X86::AX, MVT::i16, InFlag);
1717         InFlag = Result.getValue(2);
1718         Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
1719                                                Result,
1720                                    CurDAG->getTargetConstant(8, MVT::i8)), 0);
1721         // Then truncate it down to i8.
1722         SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
1723         Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
1724                                                  MVT::i8, Result, SRIdx), 0);
1725       } else {
1726         Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1727                                         HiReg, NVT, InFlag);
1728         InFlag = Result.getValue(2);
1729       }
1730       ReplaceUses(N.getValue(1), Result);
1731 #ifndef NDEBUG
1732       DEBUG(errs() << std::string(Indent-2, ' ') << "=> ");
1733       DEBUG(Result.getNode()->dump(CurDAG));
1734       DEBUG(errs() << "\n");
1735 #endif
1736     }
1737
1738 #ifndef NDEBUG
1739     Indent -= 2;
1740 #endif
1741
1742     return NULL;
1743   }
1744
1745   case ISD::SDIVREM:
1746   case ISD::UDIVREM: {
1747     SDValue N0 = Node->getOperand(0);
1748     SDValue N1 = Node->getOperand(1);
1749
1750     bool isSigned = Opcode == ISD::SDIVREM;
1751     if (!isSigned)
1752       switch (NVT.getSimpleVT()) {
1753       default: llvm_unreachable("Unsupported VT!");
1754       case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
1755       case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
1756       case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
1757       case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
1758       }
1759     else
1760       switch (NVT.getSimpleVT()) {
1761       default: llvm_unreachable("Unsupported VT!");
1762       case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
1763       case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
1764       case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
1765       case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
1766       }
1767
1768     unsigned LoReg, HiReg;
1769     unsigned ClrOpcode, SExtOpcode;
1770     switch (NVT.getSimpleVT()) {
1771     default: llvm_unreachable("Unsupported VT!");
1772     case MVT::i8:
1773       LoReg = X86::AL;  HiReg = X86::AH;
1774       ClrOpcode  = 0;
1775       SExtOpcode = X86::CBW;
1776       break;
1777     case MVT::i16:
1778       LoReg = X86::AX;  HiReg = X86::DX;
1779       ClrOpcode  = X86::MOV16r0;
1780       SExtOpcode = X86::CWD;
1781       break;
1782     case MVT::i32:
1783       LoReg = X86::EAX; HiReg = X86::EDX;
1784       ClrOpcode  = X86::MOV32r0;
1785       SExtOpcode = X86::CDQ;
1786       break;
1787     case MVT::i64:
1788       LoReg = X86::RAX; HiReg = X86::RDX;
1789       ClrOpcode  = ~0U; // NOT USED.
1790       SExtOpcode = X86::CQO;
1791       break;
1792     }
1793
1794     SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
1795     bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
1796     bool signBitIsZero = CurDAG->SignBitIsZero(N0);
1797
1798     SDValue InFlag;
1799     if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
1800       // Special case for div8, just use a move with zero extension to AX to
1801       // clear the upper 8 bits (AH).
1802       SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
1803       if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
1804         SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
1805         Move =
1806           SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16,
1807                                         MVT::Other, Ops,
1808                                         array_lengthof(Ops)), 0);
1809         Chain = Move.getValue(1);
1810         ReplaceUses(N0.getValue(1), Chain);
1811       } else {
1812         Move =
1813           SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
1814         Chain = CurDAG->getEntryNode();
1815       }
1816       Chain  = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
1817       InFlag = Chain.getValue(1);
1818     } else {
1819       InFlag =
1820         CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
1821                              LoReg, N0, SDValue()).getValue(1);
1822       if (isSigned && !signBitIsZero) {
1823         // Sign extend the low part into the high part.
1824         InFlag =
1825           SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
1826       } else {
1827         // Zero out the high part, effectively zero extending the input.
1828         SDValue ClrNode;
1829
1830         if (NVT.getSimpleVT() == MVT::i64) {
1831           ClrNode = SDValue(CurDAG->getTargetNode(X86::MOV32r0, dl, MVT::i32),
1832                             0);
1833           // We just did a 32-bit clear, insert it into a 64-bit register to
1834           // clear the whole 64-bit reg.
1835           SDValue Undef =
1836             SDValue(CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF,
1837                                           dl, MVT::i64), 0);
1838           SDValue SubRegNo =
1839             CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
1840           ClrNode =
1841             SDValue(CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl,
1842                                           MVT::i64, Undef, ClrNode, SubRegNo),
1843                     0);
1844         } else {
1845           ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT), 0);
1846         }
1847
1848         InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg,
1849                                       ClrNode, InFlag).getValue(1);
1850       }
1851     }
1852
1853     if (foldedLoad) {
1854       SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
1855                         InFlag };
1856       SDNode *CNode =
1857         CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
1858                               array_lengthof(Ops));
1859       InFlag = SDValue(CNode, 1);
1860       // Update the chain.
1861       ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
1862     } else {
1863       InFlag =
1864         SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
1865     }
1866
1867     // Copy the division (low) result, if it is needed.
1868     if (!N.getValue(0).use_empty()) {
1869       SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1870                                                 LoReg, NVT, InFlag);
1871       InFlag = Result.getValue(2);
1872       ReplaceUses(N.getValue(0), Result);
1873 #ifndef NDEBUG
1874       DEBUG(errs() << std::string(Indent-2, ' ') << "=> ");
1875       DEBUG(Result.getNode()->dump(CurDAG));
1876       DEBUG(errs() << "\n");
1877 #endif
1878     }
1879     // Copy the remainder (high) result, if it is needed.
1880     if (!N.getValue(1).use_empty()) {
1881       SDValue Result;
1882       if (HiReg == X86::AH && Subtarget->is64Bit()) {
1883         // Prevent use of AH in a REX instruction by referencing AX instead.
1884         // Shift it down 8 bits.
1885         Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1886                                         X86::AX, MVT::i16, InFlag);
1887         InFlag = Result.getValue(2);
1888         Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
1889                                       Result,
1890                                       CurDAG->getTargetConstant(8, MVT::i8)),
1891                          0);
1892         // Then truncate it down to i8.
1893         SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
1894         Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
1895                                                  MVT::i8, Result, SRIdx), 0);
1896       } else {
1897         Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
1898                                         HiReg, NVT, InFlag);
1899         InFlag = Result.getValue(2);
1900       }
1901       ReplaceUses(N.getValue(1), Result);
1902 #ifndef NDEBUG
1903       DEBUG(errs() << std::string(Indent-2, ' ') << "=> ");
1904       DEBUG(Result.getNode()->dump(CurDAG));
1905       DEBUG(errs() << "\n");
1906 #endif
1907     }
1908
1909 #ifndef NDEBUG
1910     Indent -= 2;
1911 #endif
1912
1913     return NULL;
1914   }
1915
1916   case ISD::DECLARE: {
1917     // Handle DECLARE nodes here because the second operand may have been
1918     // wrapped in X86ISD::Wrapper.
1919     SDValue Chain = Node->getOperand(0);
1920     SDValue N1 = Node->getOperand(1);
1921     SDValue N2 = Node->getOperand(2);
1922     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
1923
1924     // FIXME: We need to handle this for VLAs.
1925     if (!FINode) {
1926       ReplaceUses(N.getValue(0), Chain);
1927       return NULL;
1928     }
1929
1930     if (N2.getOpcode() == ISD::ADD &&
1931         N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg)
1932       N2 = N2.getOperand(1);
1933
1934     // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled
1935     // somehow, just ignore it.
1936     if (N2.getOpcode() != X86ISD::Wrapper &&
1937         N2.getOpcode() != X86ISD::WrapperRIP) {
1938       ReplaceUses(N.getValue(0), Chain);
1939       return NULL;
1940     }
1941     GlobalAddressSDNode *GVNode =
1942       dyn_cast<GlobalAddressSDNode>(N2.getOperand(0));
1943     if (GVNode == 0) {
1944       ReplaceUses(N.getValue(0), Chain);
1945       return NULL;
1946     }
1947     SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
1948                                                TLI.getPointerTy());
1949     SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(),
1950                                                   TLI.getPointerTy());
1951     SDValue Ops[] = { Tmp1, Tmp2, Chain };
1952     return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
1953                                  MVT::Other, Ops,
1954                                  array_lengthof(Ops));
1955   }
1956   }
1957
1958   SDNode *ResNode = SelectCode(N);
1959
1960 #ifndef NDEBUG
1961   DEBUG(errs() << std::string(Indent-2, ' ') << "=> ");
1962   if (ResNode == NULL || ResNode == N.getNode())
1963     DEBUG(N.getNode()->dump(CurDAG));
1964   else
1965     DEBUG(ResNode->dump(CurDAG));
1966   DEBUG(errs() << "\n");
1967   Indent -= 2;
1968 #endif
1969
1970   return ResNode;
1971 }
1972
1973 bool X86DAGToDAGISel::
1974 SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
1975                              std::vector<SDValue> &OutOps) {
1976   SDValue Op0, Op1, Op2, Op3, Op4;
1977   switch (ConstraintCode) {
1978   case 'o':   // offsetable        ??
1979   case 'v':   // not offsetable    ??
1980   default: return true;
1981   case 'm':   // memory
1982     if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4))
1983       return true;
1984     break;
1985   }
1986
1987   OutOps.push_back(Op0);
1988   OutOps.push_back(Op1);
1989   OutOps.push_back(Op2);
1990   OutOps.push_back(Op3);
1991   OutOps.push_back(Op4);
1992   return false;
1993 }
1994
1995 /// createX86ISelDag - This pass converts a legalized DAG into a
1996 /// X86-specific DAG, ready for instruction scheduling.
1997 ///
1998 FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
1999                                      llvm::CodeGenOpt::Level OptLevel) {
2000   return new X86DAGToDAGISel(TM, OptLevel);
2001 }