lib/Target/X86/X86ISelPattern.cpp

   1 //===-- X86ISelPattern.cpp - A pattern matching inst selector for X86 -----===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by the LLVM research group and is distributed under
   6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines a pattern matching instruction selector for X86.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "X86.h"
  15 #include "X86InstrBuilder.h"
  16 #include "X86RegisterInfo.h"
  17 #include "llvm/Constants.h"                   // FIXME: REMOVE
  18 #include "llvm/Function.h"
  19 #include "llvm/CodeGen/MachineConstantPool.h" // FIXME: REMOVE
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineFrameInfo.h"
  22 #include "llvm/CodeGen/SelectionDAG.h"
  23 #include "llvm/CodeGen/SelectionDAGISel.h"
  24 #include "llvm/CodeGen/SSARegMap.h"
  25 #include "llvm/Target/TargetData.h"
  26 #include "llvm/Target/TargetLowering.h"
  27 #include "llvm/Support/MathExtras.h"
  28 #include "llvm/ADT/Statistic.h"
  29 #include <set>
  30 using namespace llvm;
  31
  32 //===----------------------------------------------------------------------===//
  33 //  X86TargetLowering - X86 Implementation of the TargetLowering interface
  34 namespace {
  35   class X86TargetLowering : public TargetLowering {
  36     int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
  37     int ReturnAddrIndex;              // FrameIndex for return slot.
  38   public:
  39     X86TargetLowering(TargetMachine &TM) : TargetLowering(TM) {
  40       // Set up the TargetLowering object.
  41       addRegisterClass(MVT::i8, X86::R8RegisterClass);
  42       addRegisterClass(MVT::i16, X86::R16RegisterClass);
  43       addRegisterClass(MVT::i32, X86::R32RegisterClass);
  44       addRegisterClass(MVT::f64, X86::RFPRegisterClass);
  45
  46       // FIXME: Eliminate these two classes when legalize can handle promotions
  47       // well.
  48       addRegisterClass(MVT::i1, X86::R8RegisterClass);
  49       addRegisterClass(MVT::f32, X86::RFPRegisterClass);
  50
  51       computeRegisterProperties();
  52
  53       setOperationUnsupported(ISD::MUL, MVT::i8);
  54       setOperationUnsupported(ISD::SELECT, MVT::i1);
  55       setOperationUnsupported(ISD::SELECT, MVT::i8);
  56
  57       addLegalFPImmediate(+0.0); // FLD0
  58       addLegalFPImmediate(+1.0); // FLD1
  59       addLegalFPImmediate(-0.0); // FLD0/FCHS
  60       addLegalFPImmediate(-1.0); // FLD1/FCHS
  61     }
  62
  63     /// LowerArguments - This hook must be implemented to indicate how we should
  64     /// lower the arguments for the specified function, into the specified DAG.
  65     virtual std::vector<SDOperand>
  66     LowerArguments(Function &F, SelectionDAG &DAG);
  67
  68     /// LowerCallTo - This hook lowers an abstract call to a function into an
  69     /// actual call.
  70     virtual std::pair<SDOperand, SDOperand>
  71     LowerCallTo(SDOperand Chain, const Type *RetTy, SDOperand Callee,
  72                 ArgListTy &Args, SelectionDAG &DAG);
  73
  74     virtual std::pair<SDOperand, SDOperand>
  75     LowerVAStart(SDOperand Chain, SelectionDAG &DAG);
  76
  77     virtual std::pair<SDOperand,SDOperand>
  78     LowerVAArgNext(bool isVANext, SDOperand Chain, SDOperand VAList,
  79                    const Type *ArgTy, SelectionDAG &DAG);
  80
  81     virtual std::pair<SDOperand, SDOperand>
  82     LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain, unsigned Depth,
  83                             SelectionDAG &DAG);
  84   };
  85 }
  86
  87
  88 std::vector<SDOperand>
  89 X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
  90   std::vector<SDOperand> ArgValues;
  91
  92   // Add DAG nodes to load the arguments...  On entry to a function on the X86,
  93   // the stack frame looks like this:
  94   //
  95   // [ESP] -- return address
  96   // [ESP + 4] -- first argument (leftmost lexically)
  97   // [ESP + 8] -- second argument, if first argument is four bytes in size
  98   //    ...
  99   //
 100   MachineFunction &MF = DAG.getMachineFunction();
 101   MachineFrameInfo *MFI = MF.getFrameInfo();
 102
 103   unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
 104   for (Function::aiterator I = F.abegin(), E = F.aend(); I != E; ++I) {
 105     MVT::ValueType ObjectVT = getValueType(I->getType());
 106     unsigned ArgIncrement = 4;
 107     unsigned ObjSize;
 108     switch (ObjectVT) {
 109     default: assert(0 && "Unhandled argument type!");
 110     case MVT::i1:
 111     case MVT::i8:  ObjSize = 1;                break;
 112     case MVT::i16: ObjSize = 2;                break;
 113     case MVT::i32: ObjSize = 4;                break;
 114     case MVT::i64: ObjSize = ArgIncrement = 8; break;
 115     case MVT::f32: ObjSize = 4;                break;
 116     case MVT::f64: ObjSize = ArgIncrement = 8; break;
 117     }
 118     // Create the frame index object for this incoming parameter...
 119     int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
 120
 121     // Create the SelectionDAG nodes corresponding to a load from this parameter
 122     SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
 123
 124     // Don't codegen dead arguments.  FIXME: remove this check when we can nuke
 125     // dead loads.
 126     SDOperand ArgValue;
 127     if (!I->use_empty())
 128       ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN);
 129     else {
 130       if (MVT::isInteger(ObjectVT))
 131         ArgValue = DAG.getConstant(0, ObjectVT);
 132       else
 133         ArgValue = DAG.getConstantFP(0, ObjectVT);
 134     }
 135     ArgValues.push_back(ArgValue);
 136
 137     ArgOffset += ArgIncrement;   // Move on to the next argument...
 138   }
 139
 140   // If the function takes variable number of arguments, make a frame index for
 141   // the start of the first vararg value... for expansion of llvm.va_start.
 142   if (F.isVarArg())
 143     VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
 144   ReturnAddrIndex = 0;  // No return address slot generated yet.
 145   return ArgValues;
 146 }
 147
 148 std::pair<SDOperand, SDOperand>
 149 X86TargetLowering::LowerCallTo(SDOperand Chain,
 150                                const Type *RetTy, SDOperand Callee,
 151                                ArgListTy &Args, SelectionDAG &DAG) {
 152   // Count how many bytes are to be pushed on the stack.
 153   unsigned NumBytes = 0;
 154
 155   if (Args.empty()) {
 156     // Save zero bytes.
 157     Chain = DAG.getNode(ISD::ADJCALLSTACKDOWN, MVT::Other, Chain,
 158                         DAG.getConstant(0, getPointerTy()));
 159   } else {
 160     for (unsigned i = 0, e = Args.size(); i != e; ++i)
 161       switch (getValueType(Args[i].second)) {
 162       default: assert(0 && "Unknown value type!");
 163       case MVT::i1:
 164       case MVT::i8:
 165       case MVT::i16:
 166       case MVT::i32:
 167       case MVT::f32:
 168         NumBytes += 4;
 169         break;
 170       case MVT::i64:
 171       case MVT::f64:
 172         NumBytes += 8;
 173         break;
 174       }
 175
 176     Chain = DAG.getNode(ISD::ADJCALLSTACKDOWN, MVT::Other, Chain,
 177                         DAG.getConstant(NumBytes, getPointerTy()));
 178
 179     // Arguments go on the stack in reverse order, as specified by the ABI.
 180     unsigned ArgOffset = 0;
 181     SDOperand StackPtr = DAG.getCopyFromReg(X86::ESP, MVT::i32);
 182     for (unsigned i = 0, e = Args.size(); i != e; ++i) {
 183       unsigned ArgReg;
 184       SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
 185       PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
 186
 187       switch (getValueType(Args[i].second)) {
 188       default: assert(0 && "Unexpected ValueType for argument!");
 189       case MVT::i1:
 190       case MVT::i8:
 191       case MVT::i16:
 192         // Promote the integer to 32 bits.  If the input type is signed use a
 193         // sign extend, otherwise use a zero extend.
 194         if (Args[i].second->isSigned())
 195           Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
 196         else
 197           Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
 198
 199         // FALL THROUGH
 200       case MVT::i32:
 201       case MVT::f32:
 202         // FIXME: Note that all of these stores are independent of each other.
 203         Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain,
 204                             Args[i].first, PtrOff);
 205         ArgOffset += 4;
 206         break;
 207       case MVT::i64:
 208       case MVT::f64:
 209         // FIXME: Note that all of these stores are independent of each other.
 210         Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain,
 211                             Args[i].first, PtrOff);
 212         ArgOffset += 8;
 213         break;
 214       }
 215     }
 216   }
 217
 218   std::vector<MVT::ValueType> RetVals;
 219   MVT::ValueType RetTyVT = getValueType(RetTy);
 220   if (RetTyVT != MVT::isVoid)
 221     RetVals.push_back(RetTyVT);
 222   RetVals.push_back(MVT::Other);
 223
 224   SDOperand TheCall = SDOperand(DAG.getCall(RetVals, Chain, Callee), 0);
 225   Chain = TheCall.getValue(RetTyVT != MVT::isVoid);
 226   Chain = DAG.getNode(ISD::ADJCALLSTACKUP, MVT::Other, Chain,
 227                       DAG.getConstant(NumBytes, getPointerTy()));
 228   return std::make_pair(TheCall, Chain);
 229 }
 230
 231 std::pair<SDOperand, SDOperand>
 232 X86TargetLowering::LowerVAStart(SDOperand Chain, SelectionDAG &DAG) {
 233   // vastart just returns the address of the VarArgsFrameIndex slot.
 234   return std::make_pair(DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32), Chain);
 235 }
 236
 237 std::pair<SDOperand,SDOperand> X86TargetLowering::
 238 LowerVAArgNext(bool isVANext, SDOperand Chain, SDOperand VAList,
 239                const Type *ArgTy, SelectionDAG &DAG) {
 240   MVT::ValueType ArgVT = getValueType(ArgTy);
 241   SDOperand Result;
 242   if (!isVANext) {
 243     Result = DAG.getLoad(ArgVT, DAG.getEntryNode(), VAList);
 244   } else {
 245     unsigned Amt;
 246     if (ArgVT == MVT::i32)
 247       Amt = 4;
 248     else {
 249       assert((ArgVT == MVT::i64 || ArgVT == MVT::f64) &&
 250              "Other types should have been promoted for varargs!");
 251       Amt = 8;
 252     }
 253     Result = DAG.getNode(ISD::ADD, VAList.getValueType(), VAList,
 254                          DAG.getConstant(Amt, VAList.getValueType()));
 255   }
 256   return std::make_pair(Result, Chain);
 257 }
 258
 259
 260 std::pair<SDOperand, SDOperand> X86TargetLowering::
 261 LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
 262                         SelectionDAG &DAG) {
 263   SDOperand Result;
 264   if (Depth)        // Depths > 0 not supported yet!
 265     Result = DAG.getConstant(0, getPointerTy());
 266   else {
 267     if (ReturnAddrIndex == 0) {
 268       // Set up a frame object for the return address.
 269       MachineFunction &MF = DAG.getMachineFunction();
 270       ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
 271     }
 272
 273     SDOperand RetAddrFI = DAG.getFrameIndex(ReturnAddrIndex, MVT::i32);
 274
 275     if (!isFrameAddress)
 276       // Just load the return address
 277       Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI);
 278     else
 279       Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI,
 280                            DAG.getConstant(4, MVT::i32));
 281   }
 282   return std::make_pair(Result, Chain);
 283 }
 284
 285
 286
 287
 288
 289 namespace {
 290   Statistic<>
 291   NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
 292
 293   //===--------------------------------------------------------------------===//
 294   /// ISel - X86 specific code to select X86 machine instructions for
 295   /// SelectionDAG operations.
 296   ///
 297   class ISel : public SelectionDAGISel {
 298     /// ContainsFPCode - Every instruction we select that uses or defines a FP
 299     /// register should set this to true.
 300     bool ContainsFPCode;
 301
 302     /// X86Lowering - This object fully describes how to lower LLVM code to an
 303     /// X86-specific SelectionDAG.
 304     X86TargetLowering X86Lowering;
 305
 306     /// RegPressureMap - This keeps an approximate count of the number of
 307     /// registers required to evaluate each node in the graph.
 308     std::map<SDNode*, unsigned> RegPressureMap;
 309
 310     /// ExprMap - As shared expressions are codegen'd, we keep track of which
 311     /// vreg the value is produced in, so we only emit one copy of each compiled
 312     /// tree.
 313     std::map<SDOperand, unsigned> ExprMap;
 314     std::set<SDOperand> LoweredTokens;
 315
 316   public:
 317     ISel(TargetMachine &TM) : SelectionDAGISel(X86Lowering), X86Lowering(TM) {
 318     }
 319
 320     unsigned getRegPressure(SDOperand O) {
 321       return RegPressureMap[O.Val];
 322     }
 323     unsigned ComputeRegPressure(SDOperand O);
 324
 325     /// InstructionSelectBasicBlock - This callback is invoked by
 326     /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
 327     virtual void InstructionSelectBasicBlock(SelectionDAG &DAG) {
 328       // While we're doing this, keep track of whether we see any FP code for
 329       // FP_REG_KILL insertion.
 330       ContainsFPCode = false;
 331
 332       // Compute the RegPressureMap, which is an approximation for the number of
 333       // registers required to compute each node.
 334       ComputeRegPressure(DAG.getRoot());
 335
 336       //DAG.viewGraph();
 337
 338       // Codegen the basic block.
 339       Select(DAG.getRoot());
 340
 341       // Insert FP_REG_KILL instructions into basic blocks that need them.  This
 342       // only occurs due to the floating point stackifier not being aggressive
 343       // enough to handle arbitrary global stackification.
 344       //
 345       // Currently we insert an FP_REG_KILL instruction into each block that
 346       // uses or defines a floating point virtual register.
 347       //
 348       // When the global register allocators (like linear scan) finally update
 349       // live variable analysis, we can keep floating point values in registers
 350       // across basic blocks.  This will be a huge win, but we are waiting on
 351       // the global allocators before we can do this.
 352       //
 353       if (ContainsFPCode && BB->succ_size()) {
 354         BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
 355         ++NumFPKill;
 356       }
 357
 358       // Clear state used for selection.
 359       ExprMap.clear();
 360       LoweredTokens.clear();
 361       RegPressureMap.clear();
 362     }
 363
 364     void EmitCMP(SDOperand LHS, SDOperand RHS);
 365     bool EmitBranchCC(MachineBasicBlock *Dest, SDOperand Cond);
 366     void EmitSelectCC(SDOperand Cond, MVT::ValueType SVT,
 367                       unsigned RTrue, unsigned RFalse, unsigned RDest);
 368     unsigned SelectExpr(SDOperand N);
 369     bool SelectAddress(SDOperand N, X86AddressMode &AM);
 370     void Select(SDOperand N);
 371   };
 372 }
 373
 374 // ComputeRegPressure - Compute the RegPressureMap, which is an approximation
 375 // for the number of registers required to compute each node.  This is basically
 376 // computing a generalized form of the Sethi-Ullman number for each node.
 377 unsigned ISel::ComputeRegPressure(SDOperand O) {
 378   SDNode *N = O.Val;
 379   unsigned &Result = RegPressureMap[N];
 380   if (Result) return Result;
 381
 382   // FIXME: Should operations like CALL (which clobber lots o regs) have a
 383   // higher fixed cost??
 384
 385   if (N->getNumOperands() == 0)
 386     return Result = 1;
 387
 388   unsigned MaxRegUse = 0;
 389   unsigned NumExtraMaxRegUsers = 0;
 390   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
 391     unsigned Regs = ComputeRegPressure(N->getOperand(i));
 392     if (Regs > MaxRegUse) {
 393       MaxRegUse = Regs;
 394       NumExtraMaxRegUsers = 0;
 395     } else if (Regs == MaxRegUse) {
 396       ++NumExtraMaxRegUsers;
 397     }
 398   }
 399
 400   return Result = MaxRegUse+NumExtraMaxRegUsers;
 401 }
 402
 403 /// SelectAddress - Add the specified node to the specified addressing mode,
 404 /// returning true if it cannot be done.
 405 bool ISel::SelectAddress(SDOperand N, X86AddressMode &AM) {
 406   switch (N.getOpcode()) {
 407   default: break;
 408   case ISD::FrameIndex:
 409     if (AM.BaseType == X86AddressMode::RegBase && AM.Base.Reg == 0) {
 410       AM.BaseType = X86AddressMode::FrameIndexBase;
 411       AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
 412       return false;
 413     }
 414     break;
 415   case ISD::GlobalAddress:
 416     if (AM.GV == 0) {
 417       AM.GV = cast<GlobalAddressSDNode>(N)->getGlobal();
 418       return false;
 419     }
 420     break;
 421   case ISD::Constant:
 422     AM.Disp += cast<ConstantSDNode>(N)->getValue();
 423     return false;
 424   case ISD::SHL:
 425     if (AM.IndexReg == 0 || AM.Scale == 1)
 426       if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
 427         unsigned Val = CN->getValue();
 428         if (Val == 1 || Val == 2 || Val == 3) {
 429           AM.Scale = 1 << Val;
 430           AM.IndexReg = SelectExpr(N.Val->getOperand(0));
 431           return false;
 432         }
 433       }
 434     break;
 435
 436   case ISD::ADD: {
 437     X86AddressMode Backup = AM;
 438     if (!SelectAddress(N.Val->getOperand(0), AM) &&
 439         !SelectAddress(N.Val->getOperand(1), AM))
 440       return false;
 441     AM = Backup;
 442     break;
 443   }
 444   }
 445
 446   if (AM.BaseType != X86AddressMode::RegBase ||
 447       AM.Base.Reg)
 448     return true;
 449
 450   // Default, generate it as a register.
 451   AM.BaseType = X86AddressMode::RegBase;
 452   AM.Base.Reg = SelectExpr(N);
 453   return false;
 454 }
 455
 456 /// Emit2SetCCsAndLogical - Emit the following sequence of instructions,
 457 /// assuming that the temporary registers are in the 8-bit register class.
 458 ///
 459 ///  Tmp1 = setcc1
 460 ///  Tmp2 = setcc2
 461 ///  DestReg = logicalop Tmp1, Tmp2
 462 ///
 463 static void Emit2SetCCsAndLogical(MachineBasicBlock *BB, unsigned SetCC1,
 464                                   unsigned SetCC2, unsigned LogicalOp,
 465                                   unsigned DestReg) {
 466   SSARegMap *RegMap = BB->getParent()->getSSARegMap();
 467   unsigned Tmp1 = RegMap->createVirtualRegister(X86::R8RegisterClass);
 468   unsigned Tmp2 = RegMap->createVirtualRegister(X86::R8RegisterClass);
 469   BuildMI(BB, SetCC1, 0, Tmp1);
 470   BuildMI(BB, SetCC2, 0, Tmp2);
 471   BuildMI(BB, LogicalOp, 2, DestReg).addReg(Tmp1).addReg(Tmp2);
 472 }
 473
 474 /// EmitSetCC - Emit the code to set the specified 8-bit register to 1 if the
 475 /// condition codes match the specified SetCCOpcode.  Note that some conditions
 476 /// require multiple instructions to generate the correct value.
 477 static void EmitSetCC(MachineBasicBlock *BB, unsigned DestReg,
 478                       ISD::CondCode SetCCOpcode, bool isFP) {
 479   unsigned Opc;
 480   if (!isFP) {
 481     switch (SetCCOpcode) {
 482     default: assert(0 && "Illegal integer SetCC!");
 483     case ISD::SETEQ: Opc = X86::SETEr; break;
 484     case ISD::SETGT: Opc = X86::SETGr; break;
 485     case ISD::SETGE: Opc = X86::SETGEr; break;
 486     case ISD::SETLT: Opc = X86::SETLr; break;
 487     case ISD::SETLE: Opc = X86::SETLEr; break;
 488     case ISD::SETNE: Opc = X86::SETNEr; break;
 489     case ISD::SETULT: Opc = X86::SETBr; break;
 490     case ISD::SETUGT: Opc = X86::SETAr; break;
 491     case ISD::SETULE: Opc = X86::SETBEr; break;
 492     case ISD::SETUGE: Opc = X86::SETAEr; break;
 493     }
 494   } else {
 495     // On a floating point condition, the flags are set as follows:
 496     // ZF  PF  CF   op
 497     //  0 | 0 | 0 | X > Y
 498     //  0 | 0 | 1 | X < Y
 499     //  1 | 0 | 0 | X == Y
 500     //  1 | 1 | 1 | unordered
 501     //
 502     switch (SetCCOpcode) {
 503     default: assert(0 && "Invalid FP setcc!");
 504     case ISD::SETUEQ:
 505     case ISD::SETEQ:
 506       Opc = X86::SETEr;    // True if ZF = 1
 507       break;
 508     case ISD::SETOGT:
 509     case ISD::SETGT:
 510       Opc = X86::SETAr;    // True if CF = 0 and ZF = 0
 511       break;
 512     case ISD::SETOGE:
 513     case ISD::SETGE:
 514       Opc = X86::SETAEr;   // True if CF = 0
 515       break;
 516     case ISD::SETULT:
 517     case ISD::SETLT:
 518       Opc = X86::SETBr;    // True if CF = 1
 519       break;
 520     case ISD::SETULE:
 521     case ISD::SETLE:
 522       Opc = X86::SETBEr;   // True if CF = 1 or ZF = 1
 523       break;
 524     case ISD::SETONE:
 525     case ISD::SETNE:
 526       Opc = X86::SETNEr;   // True if ZF = 0
 527       break;
 528     case ISD::SETUO:
 529       Opc = X86::SETPr;    // True if PF = 1
 530       break;
 531     case ISD::SETO:
 532       Opc = X86::SETNPr;   // True if PF = 0
 533       break;
 534     case ISD::SETOEQ:      // !PF & ZF
 535       Emit2SetCCsAndLogical(BB, X86::SETNPr, X86::SETEr, X86::AND8rr, DestReg);
 536       return;
 537     case ISD::SETOLT:      // !PF & CF
 538       Emit2SetCCsAndLogical(BB, X86::SETNPr, X86::SETBr, X86::AND8rr, DestReg);
 539       return;
 540     case ISD::SETOLE:      // !PF & (CF || ZF)
 541       Emit2SetCCsAndLogical(BB, X86::SETNPr, X86::SETBEr, X86::AND8rr, DestReg);
 542       return;
 543     case ISD::SETUGT:      // PF | (!ZF & !CF)
 544       Emit2SetCCsAndLogical(BB, X86::SETPr, X86::SETAr, X86::OR8rr, DestReg);
 545       return;
 546     case ISD::SETUGE:      // PF | !CF
 547       Emit2SetCCsAndLogical(BB, X86::SETPr, X86::SETAEr, X86::OR8rr, DestReg);
 548       return;
 549     case ISD::SETUNE:      // PF | !ZF
 550       Emit2SetCCsAndLogical(BB, X86::SETPr, X86::SETNEr, X86::OR8rr, DestReg);
 551       return;
 552     }
 553   }
 554   BuildMI(BB, Opc, 0, DestReg);
 555 }
 556
 557
 558 /// EmitBranchCC - Emit code into BB that arranges for control to transfer to
 559 /// the Dest block if the Cond condition is true.  If we cannot fold this
 560 /// condition into the branch, return true.
 561 ///
 562 bool ISel::EmitBranchCC(MachineBasicBlock *Dest, SDOperand Cond) {
 563   // FIXME: Evaluate whether it would be good to emit code like (X < Y) | (A >
 564   // B) using two conditional branches instead of one condbr, two setcc's, and
 565   // an or.
 566   if ((Cond.getOpcode() == ISD::OR ||
 567        Cond.getOpcode() == ISD::AND) && Cond.Val->hasOneUse()) {
 568     // And and or set the flags for us, so there is no need to emit a TST of the
 569     // result.  It is only safe to do this if there is only a single use of the
 570     // AND/OR though, otherwise we don't know it will be emitted here.
 571     SelectExpr(Cond);
 572     BuildMI(BB, X86::JNE, 1).addMBB(Dest);
 573     return false;
 574   }
 575
 576   // Codegen br not C -> JE.
 577   if (Cond.getOpcode() == ISD::XOR)
 578     if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(Cond.Val->getOperand(1)))
 579       if (NC->isAllOnesValue()) {
 580         unsigned CondR = SelectExpr(Cond.Val->getOperand(0));
 581         BuildMI(BB, X86::TEST8rr, 2).addReg(CondR).addReg(CondR);
 582         BuildMI(BB, X86::JE, 1).addMBB(Dest);
 583         return false;
 584       }
 585
 586   SetCCSDNode *SetCC = dyn_cast<SetCCSDNode>(Cond);
 587   if (SetCC == 0)
 588     return true;                       // Can only handle simple setcc's so far.
 589
 590   unsigned Opc;
 591
 592   // Handle integer conditions first.
 593   if (MVT::isInteger(SetCC->getOperand(0).getValueType())) {
 594     switch (SetCC->getCondition()) {
 595     default: assert(0 && "Illegal integer SetCC!");
 596     case ISD::SETEQ: Opc = X86::JE; break;
 597     case ISD::SETGT: Opc = X86::JG; break;
 598     case ISD::SETGE: Opc = X86::JGE; break;
 599     case ISD::SETLT: Opc = X86::JL; break;
 600     case ISD::SETLE: Opc = X86::JLE; break;
 601     case ISD::SETNE: Opc = X86::JNE; break;
 602     case ISD::SETULT: Opc = X86::JB; break;
 603     case ISD::SETUGT: Opc = X86::JA; break;
 604     case ISD::SETULE: Opc = X86::JBE; break;
 605     case ISD::SETUGE: Opc = X86::JAE; break;
 606     }
 607     EmitCMP(SetCC->getOperand(0), SetCC->getOperand(1));
 608     BuildMI(BB, Opc, 1).addMBB(Dest);
 609     return false;
 610   }
 611
 612   unsigned Opc2 = 0;  // Second branch if needed.
 613
 614   // On a floating point condition, the flags are set as follows:
 615   // ZF  PF  CF   op
 616   //  0 | 0 | 0 | X > Y
 617   //  0 | 0 | 1 | X < Y
 618   //  1 | 0 | 0 | X == Y
 619   //  1 | 1 | 1 | unordered
 620   //
 621   switch (SetCC->getCondition()) {
 622   default: assert(0 && "Invalid FP setcc!");
 623   case ISD::SETUEQ:
 624   case ISD::SETEQ:   Opc = X86::JE;  break;     // True if ZF = 1
 625   case ISD::SETOGT:
 626   case ISD::SETGT:   Opc = X86::JA;  break;     // True if CF = 0 and ZF = 0
 627   case ISD::SETOGE:
 628   case ISD::SETGE:   Opc = X86::JAE; break;     // True if CF = 0
 629   case ISD::SETULT:
 630   case ISD::SETLT:   Opc = X86::JB;  break;     // True if CF = 1
 631   case ISD::SETULE:
 632   case ISD::SETLE:   Opc = X86::JBE; break;     // True if CF = 1 or ZF = 1
 633   case ISD::SETONE:
 634   case ISD::SETNE:   Opc = X86::JNE; break;     // True if ZF = 0
 635   case ISD::SETUO:   Opc = X86::JP;  break;     // True if PF = 1
 636   case ISD::SETO:    Opc = X86::JNP; break;     // True if PF = 0
 637   case ISD::SETUGT:      // PF = 1 | (ZF = 0 & CF = 0)
 638     Opc = X86::JA;       // ZF = 0 & CF = 0
 639     Opc2 = X86::JP;      // PF = 1
 640     break;
 641   case ISD::SETUGE:      // PF = 1 | CF = 0
 642     Opc = X86::JAE;      // CF = 0
 643     Opc2 = X86::JP;      // PF = 1
 644     break;
 645   case ISD::SETUNE:      // PF = 1 | ZF = 0
 646     Opc = X86::JNE;      // ZF = 0
 647     Opc2 = X86::JP;      // PF = 1
 648     break;
 649   case ISD::SETOEQ:      // PF = 0 & ZF = 1
 650     //X86::JNP, X86::JE
 651     //X86::AND8rr
 652     return true;    // FIXME: Emit more efficient code for this branch.
 653   case ISD::SETOLT:      // PF = 0 & CF = 1
 654     //X86::JNP, X86::JB
 655     //X86::AND8rr
 656     return true;    // FIXME: Emit more efficient code for this branch.
 657   case ISD::SETOLE:      // PF = 0 & (CF = 1 || ZF = 1)
 658     //X86::JNP, X86::JBE
 659     //X86::AND8rr
 660     return true;    // FIXME: Emit more efficient code for this branch.
 661   }
 662
 663   EmitCMP(SetCC->getOperand(0), SetCC->getOperand(1));
 664   BuildMI(BB, Opc, 1).addMBB(Dest);
 665   if (Opc2)
 666     BuildMI(BB, Opc2, 1).addMBB(Dest);
 667   return false;
 668 }
 669
 670 /// EmitSelectCC - Emit code into BB that performs a select operation between
 671 /// the two registers RTrue and RFalse, generating a result into RDest.  Return
 672 /// true if the fold cannot be performed.
 673 ///
 674 void ISel::EmitSelectCC(SDOperand Cond, MVT::ValueType SVT,
 675                         unsigned RTrue, unsigned RFalse, unsigned RDest) {
 676   enum Condition {
 677     EQ, NE, LT, LE, GT, GE, B, BE, A, AE, P, NP,
 678     NOT_SET
 679   } CondCode = NOT_SET;
 680
 681   static const unsigned CMOVTAB16[] = {
 682     X86::CMOVE16rr,  X86::CMOVNE16rr, X86::CMOVL16rr,  X86::CMOVLE16rr,
 683     X86::CMOVG16rr,  X86::CMOVGE16rr, X86::CMOVB16rr,  X86::CMOVBE16rr,
 684     X86::CMOVA16rr,  X86::CMOVAE16rr, X86::CMOVP16rr,  X86::CMOVNP16rr,
 685   };
 686   static const unsigned CMOVTAB32[] = {
 687     X86::CMOVE32rr,  X86::CMOVNE32rr, X86::CMOVL32rr,  X86::CMOVLE32rr,
 688     X86::CMOVG32rr,  X86::CMOVGE32rr, X86::CMOVB32rr,  X86::CMOVBE32rr,
 689     X86::CMOVA32rr,  X86::CMOVAE32rr, X86::CMOVP32rr,  X86::CMOVNP32rr,
 690   };
 691   static const unsigned CMOVTABFP[] = {
 692     X86::FCMOVE ,  X86::FCMOVNE, /*missing*/0, /*missing*/0,
 693     /*missing*/0,  /*missing*/0, X86::FCMOVB , X86::FCMOVBE,
 694     X86::FCMOVA ,  X86::FCMOVAE, X86::FCMOVP , X86::FCMOVNP
 695   };
 696
 697   if (SetCCSDNode *SetCC = dyn_cast<SetCCSDNode>(Cond)) {
 698     if (MVT::isInteger(SetCC->getOperand(0).getValueType())) {
 699       switch (SetCC->getCondition()) {
 700       default: assert(0 && "Unknown integer comparison!");
 701       case ISD::SETEQ:  CondCode = EQ; break;
 702       case ISD::SETGT:  CondCode = GT; break;
 703       case ISD::SETGE:  CondCode = GE; break;
 704       case ISD::SETLT:  CondCode = LT; break;
 705       case ISD::SETLE:  CondCode = LE; break;
 706       case ISD::SETNE:  CondCode = NE; break;
 707       case ISD::SETULT: CondCode = B; break;
 708       case ISD::SETUGT: CondCode = A; break;
 709       case ISD::SETULE: CondCode = BE; break;
 710       case ISD::SETUGE: CondCode = AE; break;
 711       }
 712     } else {
 713       // On a floating point condition, the flags are set as follows:
 714       // ZF  PF  CF   op
 715       //  0 | 0 | 0 | X > Y
 716       //  0 | 0 | 1 | X < Y
 717       //  1 | 0 | 0 | X == Y
 718       //  1 | 1 | 1 | unordered
 719       //
 720       switch (SetCC->getCondition()) {
 721       default: assert(0 && "Unknown FP comparison!");
 722       case ISD::SETUEQ:
 723       case ISD::SETEQ:  CondCode = EQ; break;     // True if ZF = 1
 724       case ISD::SETOGT:
 725       case ISD::SETGT:  CondCode = A;  break;     // True if CF = 0 and ZF = 0
 726       case ISD::SETOGE:
 727       case ISD::SETGE:  CondCode = AE; break;     // True if CF = 0
 728       case ISD::SETULT:
 729       case ISD::SETLT:  CondCode = B;  break;     // True if CF = 1
 730       case ISD::SETULE:
 731       case ISD::SETLE:  CondCode = BE; break;     // True if CF = 1 or ZF = 1
 732       case ISD::SETONE:
 733       case ISD::SETNE:  CondCode = NE; break;     // True if ZF = 0
 734       case ISD::SETUO:  CondCode = P;  break;     // True if PF = 1
 735       case ISD::SETO:   CondCode = NP; break;     // True if PF = 0
 736       case ISD::SETUGT:      // PF = 1 | (ZF = 0 & CF = 0)
 737       case ISD::SETUGE:      // PF = 1 | CF = 0
 738       case ISD::SETUNE:      // PF = 1 | ZF = 0
 739       case ISD::SETOEQ:      // PF = 0 & ZF = 1
 740       case ISD::SETOLT:      // PF = 0 & CF = 1
 741       case ISD::SETOLE:      // PF = 0 & (CF = 1 || ZF = 1)
 742         // We cannot emit this comparison as a single cmov.
 743         break;
 744       }
 745     }
 746   }
 747
 748   unsigned Opc = 0;
 749   if (CondCode != NOT_SET) {
 750     switch (SVT) {
 751     default: assert(0 && "Cannot select this type!");
 752     case MVT::i16: Opc = CMOVTAB16[CondCode]; break;
 753     case MVT::i32: Opc = CMOVTAB32[CondCode]; break;
 754     case MVT::f32:
 755     case MVT::f64: Opc = CMOVTABFP[CondCode]; break;
 756     }
 757   }
 758
 759   // Finally, if we weren't able to fold this, just emit the condition and test
 760   // it.
 761   if (CondCode == NOT_SET || Opc == 0) {
 762     // Get the condition into the zero flag.
 763     unsigned CondReg = SelectExpr(Cond);
 764     BuildMI(BB, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
 765
 766     switch (SVT) {
 767     default: assert(0 && "Cannot select this type!");
 768     case MVT::i16: Opc = X86::CMOVE16rr; break;
 769     case MVT::i32: Opc = X86::CMOVE32rr; break;
 770     case MVT::f32:
 771     case MVT::f64: Opc = X86::FCMOVE; break;
 772     }
 773   } else {
 774     // FIXME: CMP R, 0 -> TEST R, R
 775     EmitCMP(Cond.getOperand(0), Cond.getOperand(1));
 776     std::swap(RTrue, RFalse);
 777   }
 778   BuildMI(BB, Opc, 2, RDest).addReg(RTrue).addReg(RFalse);
 779 }
 780
 781 void ISel::EmitCMP(SDOperand LHS, SDOperand RHS) {
 782   unsigned Opc;
 783   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
 784     Opc = 0;
 785     switch (RHS.getValueType()) {
 786     default: break;
 787     case MVT::i1:
 788     case MVT::i8:  Opc = X86::CMP8ri;  break;
 789     case MVT::i16: Opc = X86::CMP16ri; break;
 790     case MVT::i32: Opc = X86::CMP32ri; break;
 791     }
 792     if (Opc) {
 793       unsigned Tmp1 = SelectExpr(LHS);
 794       BuildMI(BB, Opc, 2).addReg(Tmp1).addImm(CN->getValue());
 795       return;
 796     }
 797   }
 798
 799   switch (LHS.getValueType()) {
 800   default: assert(0 && "Cannot compare this value!");
 801   case MVT::i1:
 802   case MVT::i8:  Opc = X86::CMP8rr;  break;
 803   case MVT::i16: Opc = X86::CMP16rr; break;
 804   case MVT::i32: Opc = X86::CMP32rr; break;
 805   case MVT::f32:
 806   case MVT::f64: Opc = X86::FUCOMIr; break;
 807   }
 808   unsigned Tmp1, Tmp2;
 809   if (getRegPressure(LHS) > getRegPressure(RHS)) {
 810     Tmp1 = SelectExpr(LHS);
 811     Tmp2 = SelectExpr(RHS);
 812   } else {
 813     Tmp2 = SelectExpr(RHS);
 814     Tmp1 = SelectExpr(LHS);
 815   }
 816   BuildMI(BB, Opc, 2).addReg(Tmp1).addReg(Tmp2);
 817 }
 818
 819 unsigned ISel::SelectExpr(SDOperand N) {
 820   unsigned Result;
 821   unsigned Tmp1, Tmp2, Tmp3;
 822   unsigned Opc = 0;
 823
 824   SDNode *Node = N.Val;
 825
 826   if (Node->getOpcode() == ISD::CopyFromReg)
 827     // Just use the specified register as our input.
 828     return dyn_cast<CopyRegSDNode>(Node)->getReg();
 829
 830   // If there are multiple uses of this expression, memorize the
 831   // register it is code generated in, instead of emitting it multiple
 832   // times.
 833   // FIXME: Disabled for our current selection model.
 834   if (1 || !Node->hasOneUse()) {
 835     unsigned &Reg = ExprMap[N];
 836     if (Reg) return Reg;
 837
 838     if (N.getOpcode() != ISD::CALL)
 839       Reg = Result = (N.getValueType() != MVT::Other) ?
 840                          MakeReg(N.getValueType()) : 1;
 841     else {
 842       // If this is a call instruction, make sure to prepare ALL of the result
 843       // values as well as the chain.
 844       if (Node->getNumValues() == 1)
 845         Reg = Result = 1;  // Void call, just a chain.
 846       else {
 847         Result = MakeReg(Node->getValueType(0));
 848         ExprMap[N.getValue(0)] = Result;
 849         for (unsigned i = 1, e = N.Val->getNumValues()-1; i != e; ++i)
 850           ExprMap[N.getValue(i)] = MakeReg(Node->getValueType(i));
 851         ExprMap[SDOperand(Node, Node->getNumValues()-1)] = 1;
 852       }
 853     }
 854   } else {
 855     Result = MakeReg(N.getValueType());
 856   }
 857
 858   switch (N.getOpcode()) {
 859   default:
 860     Node->dump();
 861     assert(0 && "Node not handled!\n");
 862   case ISD::FrameIndex:
 863     Tmp1 = cast<FrameIndexSDNode>(N)->getIndex();
 864     addFrameReference(BuildMI(BB, X86::LEA32r, 4, Result), (int)Tmp1);
 865     return Result;
 866   case ISD::ConstantPool:
 867     Tmp1 = cast<ConstantPoolSDNode>(N)->getIndex();
 868     addConstantPoolReference(BuildMI(BB, X86::LEA32r, 4, Result), Tmp1);
 869     return Result;
 870   case ISD::ConstantFP:
 871     ContainsFPCode = true;
 872     Tmp1 = Result;   // Intermediate Register
 873     if (cast<ConstantFPSDNode>(N)->getValue() < 0.0 ||
 874         cast<ConstantFPSDNode>(N)->isExactlyValue(-0.0))
 875       Tmp1 = MakeReg(MVT::f64);
 876
 877     if (cast<ConstantFPSDNode>(N)->isExactlyValue(+0.0) ||
 878         cast<ConstantFPSDNode>(N)->isExactlyValue(-0.0))
 879       BuildMI(BB, X86::FLD0, 0, Tmp1);
 880     else if (cast<ConstantFPSDNode>(N)->isExactlyValue(+1.0) ||
 881              cast<ConstantFPSDNode>(N)->isExactlyValue(-1.0))
 882       BuildMI(BB, X86::FLD1, 0, Tmp1);
 883     else
 884       assert(0 && "Unexpected constant!");
 885     if (Tmp1 != Result)
 886       BuildMI(BB, X86::FCHS, 1, Result).addReg(Tmp1);
 887     return Result;
 888   case ISD::Constant:
 889     switch (N.getValueType()) {
 890     default: assert(0 && "Cannot use constants of this type!");
 891     case MVT::i1:
 892     case MVT::i8:  Opc = X86::MOV8ri;  break;
 893     case MVT::i16: Opc = X86::MOV16ri; break;
 894     case MVT::i32: Opc = X86::MOV32ri; break;
 895     }
 896     BuildMI(BB, Opc, 1,Result).addImm(cast<ConstantSDNode>(N)->getValue());
 897     return Result;
 898   case ISD::GlobalAddress: {
 899     GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
 900     BuildMI(BB, X86::MOV32ri, 1, Result).addGlobalAddress(GV);
 901     return Result;
 902   }
 903   case ISD::ExternalSymbol: {
 904     const char *Sym = cast<ExternalSymbolSDNode>(N)->getSymbol();
 905     BuildMI(BB, X86::MOV32ri, 1, Result).addExternalSymbol(Sym);
 906     return Result;
 907   }
 908   case ISD::FP_EXTEND:
 909     Tmp1 = SelectExpr(N.getOperand(0));
 910     BuildMI(BB, X86::FpMOV, 1, Result).addReg(Tmp1);
 911     return Result;
 912   case ISD::ZERO_EXTEND: {
 913     int DestIs16 = N.getValueType() == MVT::i16;
 914     int SrcIs16  = N.getOperand(0).getValueType() == MVT::i16;
 915     Tmp1 = SelectExpr(N.getOperand(0));
 916
 917     // FIXME: This hack is here for zero extension casts from bool to i8.  This
 918     // would not be needed if bools were promoted by Legalize.
 919     if (N.getValueType() == MVT::i8) {
 920       BuildMI(BB, X86::MOV8rr, 1, Result).addReg(Tmp1);
 921       return Result;
 922     }
 923
 924     static const unsigned Opc[3] = {
 925       X86::MOVZX32rr8, X86::MOVZX32rr16, X86::MOVZX16rr8
 926     };
 927     BuildMI(BB, Opc[SrcIs16+DestIs16*2], 1, Result).addReg(Tmp1);
 928     return Result;
 929   }
 930   case ISD::SIGN_EXTEND: {
 931     int DestIs16 = N.getValueType() == MVT::i16;
 932     int SrcIs16  = N.getOperand(0).getValueType() == MVT::i16;
 933
 934     // FIXME: Legalize should promote bools to i8!
 935     assert(N.getOperand(0).getValueType() != MVT::i1 &&
 936            "Sign extend from bool not implemented!");
 937
 938     static const unsigned Opc[3] = {
 939       X86::MOVSX32rr8, X86::MOVSX32rr16, X86::MOVSX16rr8
 940     };
 941     Tmp1 = SelectExpr(N.getOperand(0));
 942     BuildMI(BB, Opc[SrcIs16+DestIs16*2], 1, Result).addReg(Tmp1);
 943     return Result;
 944   }
 945   case ISD::TRUNCATE:
 946     // Handle cast of LARGER int to SMALLER int using a move to EAX followed by
 947     // a move out of AX or AL.
 948     switch (N.getOperand(0).getValueType()) {
 949     default: assert(0 && "Unknown truncate!");
 950     case MVT::i8:  Tmp2 = X86::AL;  Opc = X86::MOV8rr;  break;
 951     case MVT::i16: Tmp2 = X86::AX;  Opc = X86::MOV16rr; break;
 952     case MVT::i32: Tmp2 = X86::EAX; Opc = X86::MOV32rr; break;
 953     }
 954     Tmp1 = SelectExpr(N.getOperand(0));
 955     BuildMI(BB, Opc, 1, Tmp2).addReg(Tmp1);
 956
 957     switch (N.getValueType()) {
 958     default: assert(0 && "Unknown truncate!");
 959     case MVT::i1:
 960     case MVT::i8:  Tmp2 = X86::AL;  Opc = X86::MOV8rr;  break;
 961     case MVT::i16: Tmp2 = X86::AX;  Opc = X86::MOV16rr; break;
 962     }
 963     BuildMI(BB, Opc, 1, Result).addReg(Tmp2);
 964     return Result;
 965
 966   case ISD::FP_ROUND:
 967     // Truncate from double to float by storing to memory as float,
 968     // then reading it back into a register.
 969
 970     // Create as stack slot to use.
 971     // FIXME: This should automatically be made by the Legalizer!
 972     Tmp1 = TLI.getTargetData().getFloatAlignment();
 973     Tmp2 = BB->getParent()->getFrameInfo()->CreateStackObject(4, Tmp1);
 974
 975     // Codegen the input.
 976     Tmp1 = SelectExpr(N.getOperand(0));
 977
 978     // Emit the store, then the reload.
 979     addFrameReference(BuildMI(BB, X86::FST32m, 5), Tmp2).addReg(Tmp1);
 980     addFrameReference(BuildMI(BB, X86::FLD32m, 5, Result), Tmp2);
 981     return Result;
 982
 983   case ISD::SINT_TO_FP:
 984   case ISD::UINT_TO_FP: {
 985     // FIXME: Most of this grunt work should be done by legalize!
 986     ContainsFPCode = true;
 987
 988     // Promote the integer to a type supported by FLD.  We do this because there
 989     // are no unsigned FLD instructions, so we must promote an unsigned value to
 990     // a larger signed value, then use FLD on the larger value.
 991     //
 992     MVT::ValueType PromoteType = MVT::Other;
 993     MVT::ValueType SrcTy = N.getOperand(0).getValueType();
 994     unsigned PromoteOpcode = 0;
 995     unsigned RealDestReg = Result;
 996     switch (SrcTy) {
 997     case MVT::i1:
 998     case MVT::i8:
 999       // We don't have the facilities for directly loading byte sized data from
1000       // memory (even signed).  Promote it to 16 bits.
1001       PromoteType = MVT::i16;
1002       PromoteOpcode = Node->getOpcode() == ISD::SINT_TO_FP ?
1003         X86::MOVSX16rr8 : X86::MOVZX16rr8;
1004       break;
1005     case MVT::i16:
1006       if (Node->getOpcode() == ISD::UINT_TO_FP) {
1007         PromoteType = MVT::i32;
1008         PromoteOpcode = X86::MOVZX32rr16;
1009       }
1010       break;
1011     default:
1012       // Don't fild into the real destination.
1013       if (Node->getOpcode() == ISD::UINT_TO_FP)
1014         Result = MakeReg(Node->getValueType(0));
1015       break;
1016     }
1017
1018     Tmp1 = SelectExpr(N.getOperand(0));  // Get the operand register
1019
1020     if (PromoteType != MVT::Other) {
1021       Tmp2 = MakeReg(PromoteType);
1022       BuildMI(BB, PromoteOpcode, 1, Tmp2).addReg(Tmp1);
1023       SrcTy = PromoteType;
1024       Tmp1 = Tmp2;
1025     }
1026
1027     // Spill the integer to memory and reload it from there.
1028     unsigned Size = MVT::getSizeInBits(SrcTy)/8;
1029     MachineFunction *F = BB->getParent();
1030     int FrameIdx = F->getFrameInfo()->CreateStackObject(Size, Size);
1031
1032     switch (SrcTy) {
1033     case MVT::i64:
1034       // FIXME: this won't work for cast [u]long to FP
1035       addFrameReference(BuildMI(BB, X86::MOV32mr, 5),
1036                         FrameIdx).addReg(Tmp1);
1037       addFrameReference(BuildMI(BB, X86::MOV32mr, 5),
1038                         FrameIdx, 4).addReg(Tmp1+1);
1039       addFrameReference(BuildMI(BB, X86::FILD64m, 5, Result), FrameIdx);
1040       break;
1041     case MVT::i32:
1042       addFrameReference(BuildMI(BB, X86::MOV32mr, 5),
1043                         FrameIdx).addReg(Tmp1);
1044       addFrameReference(BuildMI(BB, X86::FILD32m, 5, Result), FrameIdx);
1045       break;
1046     case MVT::i16:
1047       addFrameReference(BuildMI(BB, X86::MOV16mr, 5),
1048                         FrameIdx).addReg(Tmp1);
1049       addFrameReference(BuildMI(BB, X86::FILD16m, 5, Result), FrameIdx);
1050       break;
1051     default: break; // No promotion required.
1052     }
1053
1054     if (Node->getOpcode() == ISD::UINT_TO_FP && SrcTy == MVT::i32) {
1055       // If this is a cast from uint -> double, we need to be careful when if
1056       // the "sign" bit is set.  If so, we don't want to make a negative number,
1057       // we want to make a positive number.  Emit code to add an offset if the
1058       // sign bit is set.
1059
1060       // Compute whether the sign bit is set by shifting the reg right 31 bits.
1061       unsigned IsNeg = MakeReg(MVT::i32);
1062       BuildMI(BB, X86::SHR32ri, 2, IsNeg).addReg(Tmp1).addImm(31);
1063
1064       // Create a CP value that has the offset in one word and 0 in the other.
1065       static ConstantInt *TheOffset = ConstantUInt::get(Type::ULongTy,
1066                                                         0x4f80000000000000ULL);
1067       unsigned CPI = F->getConstantPool()->getConstantPoolIndex(TheOffset);
1068       BuildMI(BB, X86::FADD32m, 5, RealDestReg).addReg(Result)
1069         .addConstantPoolIndex(CPI).addZImm(4).addReg(IsNeg).addSImm(0);
1070
1071     } else if (Node->getOpcode() == ISD::UINT_TO_FP && SrcTy == MVT::i64) {
1072       // We need special handling for unsigned 64-bit integer sources.  If the
1073       // input number has the "sign bit" set, then we loaded it incorrectly as a
1074       // negative 64-bit number.  In this case, add an offset value.
1075
1076       // Emit a test instruction to see if the dynamic input value was signed.
1077       BuildMI(BB, X86::TEST32rr, 2).addReg(Tmp1+1).addReg(Tmp1+1);
1078
1079       // If the sign bit is set, get a pointer to an offset, otherwise get a
1080       // pointer to a zero.
1081       MachineConstantPool *CP = F->getConstantPool();
1082       unsigned Zero = MakeReg(MVT::i32);
1083       Constant *Null = Constant::getNullValue(Type::UIntTy);
1084       addConstantPoolReference(BuildMI(BB, X86::LEA32r, 5, Zero),
1085                                CP->getConstantPoolIndex(Null));
1086       unsigned Offset = MakeReg(MVT::i32);
1087       Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000);
1088
1089       addConstantPoolReference(BuildMI(BB, X86::LEA32r, 5, Offset),
1090                                CP->getConstantPoolIndex(OffsetCst));
1091       unsigned Addr = MakeReg(MVT::i32);
1092       BuildMI(BB, X86::CMOVS32rr, 2, Addr).addReg(Zero).addReg(Offset);
1093
1094       // Load the constant for an add.  FIXME: this could make an 'fadd' that
1095       // reads directly from memory, but we don't support these yet.
1096       unsigned ConstReg = MakeReg(MVT::f64);
1097       addDirectMem(BuildMI(BB, X86::FLD32m, 4, ConstReg), Addr);
1098
1099       BuildMI(BB, X86::FpADD, 2, RealDestReg).addReg(ConstReg).addReg(Result);
1100     }
1101     return RealDestReg;
1102   }
1103   case ISD::FP_TO_SINT:
1104   case ISD::FP_TO_UINT: {
1105     // FIXME: Most of this grunt work should be done by legalize!
1106     Tmp1 = SelectExpr(N.getOperand(0));  // Get the operand register
1107
1108     // Change the floating point control register to use "round towards zero"
1109     // mode when truncating to an integer value.
1110     //
1111     MachineFunction *F = BB->getParent();
1112     int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
1113     addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
1114
1115     // Load the old value of the high byte of the control word...
1116     unsigned HighPartOfCW = MakeReg(MVT::i8);
1117     addFrameReference(BuildMI(BB, X86::MOV8rm, 4, HighPartOfCW),
1118                       CWFrameIdx, 1);
1119
1120     // Set the high part to be round to zero...
1121     addFrameReference(BuildMI(BB, X86::MOV8mi, 5),
1122                       CWFrameIdx, 1).addImm(12);
1123
1124     // Reload the modified control word now...
1125     addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
1126
1127     // Restore the memory image of control word to original value
1128     addFrameReference(BuildMI(BB, X86::MOV8mr, 5),
1129                       CWFrameIdx, 1).addReg(HighPartOfCW);
1130
1131     // We don't have the facilities for directly storing byte sized data to
1132     // memory.  Promote it to 16 bits.  We also must promote unsigned values to
1133     // larger classes because we only have signed FP stores.
1134     MVT::ValueType StoreClass = Node->getValueType(0);
1135     if (StoreClass == MVT::i8 || Node->getOpcode() == ISD::FP_TO_UINT)
1136       switch (StoreClass) {
1137       case MVT::i8:  StoreClass = MVT::i16; break;
1138       case MVT::i16: StoreClass = MVT::i32; break;
1139       case MVT::i32: StoreClass = MVT::i64; break;
1140         // The following treatment of cLong may not be perfectly right,
1141         // but it survives chains of casts of the form
1142         // double->ulong->double.
1143       case MVT::i64:  StoreClass = MVT::i64;  break;
1144       default: assert(0 && "Unknown store class!");
1145       }
1146
1147     // Spill the integer to memory and reload it from there.
1148     unsigned Size = MVT::getSizeInBits(StoreClass)/8;
1149     int FrameIdx = F->getFrameInfo()->CreateStackObject(Size, Size);
1150
1151     switch (StoreClass) {
1152     default: assert(0 && "Unknown store class!");
1153     case MVT::i16:
1154       addFrameReference(BuildMI(BB, X86::FIST16m, 5), FrameIdx).addReg(Tmp1);
1155       break;
1156     case MVT::i32:
1157       addFrameReference(BuildMI(BB, X86::FIST32m, 5), FrameIdx).addReg(Tmp1);
1158       break;
1159     case MVT::i64:
1160       addFrameReference(BuildMI(BB, X86::FISTP64m, 5), FrameIdx).addReg(Tmp1);
1161       break;
1162     }
1163
1164     switch (Node->getValueType(0)) {
1165     default:
1166       assert(0 && "Unknown integer type!");
1167     case MVT::i64:
1168       // FIXME: this isn't gunna work.
1169       addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Result), FrameIdx);
1170       addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Result+1), FrameIdx, 4);
1171     case MVT::i32:
1172       addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Result), FrameIdx);
1173       break;
1174     case MVT::i16:
1175       addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Result), FrameIdx);
1176       break;
1177     case MVT::i8:
1178       addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Result), FrameIdx);
1179       break;
1180     }
1181
1182     // Reload the original control word now.
1183     addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
1184     return Result;
1185   }
1186   case ISD::ADD:
1187     // See if we can codegen this as an LEA to fold operations together.
1188     if (N.getValueType() == MVT::i32) {
1189       X86AddressMode AM;
1190       if (!SelectAddress(N.getOperand(0), AM) &&
1191           !SelectAddress(N.getOperand(1), AM)) {
1192         // If this is not just an add, emit the LEA.  For a simple add (like
1193         // reg+reg or reg+imm), we just emit an add.  It might be a good idea to
1194         // leave this as LEA, then peephole it to 'ADD' after two address elim
1195         // happens.
1196         if (AM.Scale != 1 || AM.BaseType == X86AddressMode::FrameIndexBase ||
1197             AM.GV || (AM.Base.Reg && AM.IndexReg && AM.Disp)) {
1198           addFullAddress(BuildMI(BB, X86::LEA32r, 4, Result), AM);
1199           return Result;
1200         }
1201       }
1202     }
1203
1204     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1205       Opc = 0;
1206       if (CN->getValue() == 1) {   // add X, 1 -> inc X
1207         switch (N.getValueType()) {
1208         default: assert(0 && "Cannot integer add this type!");
1209         case MVT::i8:  Opc = X86::INC8r; break;
1210         case MVT::i16: Opc = X86::INC16r; break;
1211         case MVT::i32: Opc = X86::INC32r; break;
1212         }
1213       } else if (CN->isAllOnesValue()) { // add X, -1 -> dec X
1214         switch (N.getValueType()) {
1215         default: assert(0 && "Cannot integer add this type!");
1216         case MVT::i8:  Opc = X86::DEC8r; break;
1217         case MVT::i16: Opc = X86::DEC16r; break;
1218         case MVT::i32: Opc = X86::DEC32r; break;
1219         }
1220       }
1221
1222       if (Opc) {
1223         Tmp1 = SelectExpr(N.getOperand(0));
1224         BuildMI(BB, Opc, 1, Result).addReg(Tmp1);
1225         return Result;
1226       }
1227
1228       switch (N.getValueType()) {
1229       default: assert(0 && "Cannot add this type!");
1230       case MVT::i8:  Opc = X86::ADD8ri; break;
1231       case MVT::i16: Opc = X86::ADD16ri; break;
1232       case MVT::i32: Opc = X86::ADD32ri; break;
1233       }
1234       if (Opc) {
1235         Tmp1 = SelectExpr(N.getOperand(0));
1236         BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
1237         return Result;
1238       }
1239     }
1240
1241     switch (N.getValueType()) {
1242     default: assert(0 && "Cannot add this type!");
1243     case MVT::i8:  Opc = X86::ADD8rr; break;
1244     case MVT::i16: Opc = X86::ADD16rr; break;
1245     case MVT::i32: Opc = X86::ADD32rr; break;
1246     case MVT::f32:
1247     case MVT::f64: Opc = X86::FpADD; break;
1248     }
1249
1250     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1251       Tmp1 = SelectExpr(N.getOperand(0));
1252       Tmp2 = SelectExpr(N.getOperand(1));
1253     } else {
1254       Tmp2 = SelectExpr(N.getOperand(1));
1255       Tmp1 = SelectExpr(N.getOperand(0));
1256     }
1257
1258     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1259     return Result;
1260   case ISD::SUB:
1261     if (MVT::isInteger(N.getValueType()))
1262       if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(0)))
1263         if (CN->isNullValue()) {   // 0 - N -> neg N
1264           switch (N.getValueType()) {
1265           default: assert(0 && "Cannot sub this type!");
1266           case MVT::i1:
1267           case MVT::i8:  Opc = X86::NEG8r;  break;
1268           case MVT::i16: Opc = X86::NEG16r; break;
1269           case MVT::i32: Opc = X86::NEG32r; break;
1270           }
1271           Tmp1 = SelectExpr(N.getOperand(1));
1272           BuildMI(BB, Opc, 1, Result).addReg(Tmp1);
1273           return Result;
1274         }
1275
1276     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1277       switch (N.getValueType()) {
1278       default: assert(0 && "Cannot sub this type!");
1279       case MVT::i1:
1280       case MVT::i8:  Opc = X86::SUB8ri;  break;
1281       case MVT::i16: Opc = X86::SUB16ri; break;
1282       case MVT::i32: Opc = X86::SUB32ri; break;
1283       }
1284       Tmp1 = SelectExpr(N.getOperand(0));
1285       BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
1286       return Result;
1287     }
1288
1289     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1290       Tmp1 = SelectExpr(N.getOperand(0));
1291       Tmp2 = SelectExpr(N.getOperand(1));
1292     } else {
1293       Tmp2 = SelectExpr(N.getOperand(1));
1294       Tmp1 = SelectExpr(N.getOperand(0));
1295     }
1296
1297     switch (N.getValueType()) {
1298     default: assert(0 && "Cannot add this type!");
1299     case MVT::i1:
1300     case MVT::i8:  Opc = X86::SUB8rr; break;
1301     case MVT::i16: Opc = X86::SUB16rr; break;
1302     case MVT::i32: Opc = X86::SUB32rr; break;
1303     case MVT::f32:
1304     case MVT::f64: Opc = X86::FpSUB; break;
1305     }
1306     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1307     return Result;
1308
1309   case ISD::AND:
1310     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1311       switch (N.getValueType()) {
1312       default: assert(0 && "Cannot add this type!");
1313       case MVT::i1:
1314       case MVT::i8:  Opc = X86::AND8ri;  break;
1315       case MVT::i16: Opc = X86::AND16ri; break;
1316       case MVT::i32: Opc = X86::AND32ri; break;
1317       }
1318       Tmp1 = SelectExpr(N.getOperand(0));
1319       BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
1320       return Result;
1321     }
1322
1323     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1324       Tmp1 = SelectExpr(N.getOperand(0));
1325       Tmp2 = SelectExpr(N.getOperand(1));
1326     } else {
1327       Tmp2 = SelectExpr(N.getOperand(1));
1328       Tmp1 = SelectExpr(N.getOperand(0));
1329     }
1330
1331     switch (N.getValueType()) {
1332     default: assert(0 && "Cannot add this type!");
1333     case MVT::i1:
1334     case MVT::i8:  Opc = X86::AND8rr; break;
1335     case MVT::i16: Opc = X86::AND16rr; break;
1336     case MVT::i32: Opc = X86::AND32rr; break;
1337     }
1338     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1339     return Result;
1340   case ISD::OR:
1341     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1342       Tmp1 = SelectExpr(N.getOperand(0));
1343       switch (N.getValueType()) {
1344       default: assert(0 && "Cannot add this type!");
1345       case MVT::i1:
1346       case MVT::i8:  Opc = X86::OR8ri;  break;
1347       case MVT::i16: Opc = X86::OR16ri; break;
1348       case MVT::i32: Opc = X86::OR32ri; break;
1349       }
1350       BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
1351       return Result;
1352     }
1353
1354     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1355       Tmp1 = SelectExpr(N.getOperand(0));
1356       Tmp2 = SelectExpr(N.getOperand(1));
1357     } else {
1358       Tmp2 = SelectExpr(N.getOperand(1));
1359       Tmp1 = SelectExpr(N.getOperand(0));
1360     }
1361
1362     switch (N.getValueType()) {
1363     default: assert(0 && "Cannot add this type!");
1364     case MVT::i1:
1365     case MVT::i8:  Opc = X86::OR8rr; break;
1366     case MVT::i16: Opc = X86::OR16rr; break;
1367     case MVT::i32: Opc = X86::OR32rr; break;
1368     }
1369     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1370     return Result;
1371   case ISD::XOR:
1372     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1373       Tmp1 = SelectExpr(N.getOperand(0));
1374       switch (N.getValueType()) {
1375       default: assert(0 && "Cannot add this type!");
1376       case MVT::i1:
1377       case MVT::i8:  Opc = X86::XOR8ri;  break;
1378       case MVT::i16: Opc = X86::XOR16ri; break;
1379       case MVT::i32: Opc = X86::XOR32ri; break;
1380       }
1381       BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
1382       return Result;
1383     }
1384
1385     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1386       Tmp1 = SelectExpr(N.getOperand(0));
1387       Tmp2 = SelectExpr(N.getOperand(1));
1388     } else {
1389       Tmp2 = SelectExpr(N.getOperand(1));
1390       Tmp1 = SelectExpr(N.getOperand(0));
1391     }
1392
1393     switch (N.getValueType()) {
1394     default: assert(0 && "Cannot add this type!");
1395     case MVT::i1:
1396     case MVT::i8:  Opc = X86::XOR8rr; break;
1397     case MVT::i16: Opc = X86::XOR16rr; break;
1398     case MVT::i32: Opc = X86::XOR32rr; break;
1399     }
1400     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1401     return Result;
1402
1403   case ISD::MUL:
1404     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1405       Opc = 0;
1406       switch (N.getValueType()) {
1407       default: assert(0 && "Cannot multiply this type!");
1408       case MVT::i8:  break;
1409       case MVT::i16: Opc = X86::IMUL16rri; break;
1410       case MVT::i32: Opc = X86::IMUL32rri; break;
1411       }
1412       if (Opc) {
1413         Tmp1 = SelectExpr(N.getOperand(0));
1414         BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
1415         return Result;
1416       }
1417     }
1418
1419     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1420       Tmp1 = SelectExpr(N.getOperand(0));
1421       Tmp2 = SelectExpr(N.getOperand(1));
1422     } else {
1423       Tmp2 = SelectExpr(N.getOperand(1));
1424       Tmp1 = SelectExpr(N.getOperand(0));
1425     }
1426     switch (N.getValueType()) {
1427     default: assert(0 && "Cannot add this type!");
1428     case MVT::i8:
1429       // Must use the MUL instruction, which forces use of AL.
1430       BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(Tmp1);
1431       BuildMI(BB, X86::MUL8r, 1).addReg(Tmp2);
1432       BuildMI(BB, X86::MOV8rr, 1, Result).addReg(X86::AL);
1433       return Result;
1434     case MVT::i16: Opc = X86::IMUL16rr; break;
1435     case MVT::i32: Opc = X86::IMUL32rr; break;
1436     case MVT::f32:
1437     case MVT::f64: Opc = X86::FpMUL; break;
1438     }
1439     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1440     return Result;
1441
1442   case ISD::SELECT:
1443     if (N.getValueType() != MVT::i1 && N.getValueType() != MVT::i8) {
1444       if (getRegPressure(N.getOperand(1)) > getRegPressure(N.getOperand(2))) {
1445         Tmp2 = SelectExpr(N.getOperand(1));
1446         Tmp3 = SelectExpr(N.getOperand(2));
1447       } else {
1448         Tmp3 = SelectExpr(N.getOperand(2));
1449         Tmp2 = SelectExpr(N.getOperand(1));
1450       }
1451       EmitSelectCC(N.getOperand(0), N.getValueType(), Tmp2, Tmp3, Result);
1452       return Result;
1453     } else {
1454       // FIXME: This should not be implemented here, it should be in the generic
1455       // code!
1456       if (getRegPressure(N.getOperand(1)) > getRegPressure(N.getOperand(2))) {
1457         Tmp2 = SelectExpr(CurDAG->getNode(ISD::ZERO_EXTEND, MVT::i16,
1458                                           N.getOperand(1)));
1459         Tmp3 = SelectExpr(CurDAG->getNode(ISD::ZERO_EXTEND, MVT::i16,
1460                                           N.getOperand(2)));
1461       } else {
1462         Tmp3 = SelectExpr(CurDAG->getNode(ISD::ZERO_EXTEND, MVT::i16,
1463                                           N.getOperand(2)));
1464         Tmp2 = SelectExpr(CurDAG->getNode(ISD::ZERO_EXTEND, MVT::i16,
1465                                           N.getOperand(1)));
1466       }
1467       unsigned TmpReg = MakeReg(MVT::i16);
1468       EmitSelectCC(N.getOperand(0), MVT::i16, Tmp2, Tmp3, TmpReg);
1469       // FIXME: need subregs to do better than this!
1470       BuildMI(BB, X86::MOV16rr, 1, X86::AX).addReg(TmpReg);
1471       BuildMI(BB, X86::MOV8rr, 1, Result).addReg(X86::AL);
1472       return Result;
1473     }
1474
1475   case ISD::SDIV:
1476   case ISD::UDIV:
1477   case ISD::SREM:
1478   case ISD::UREM: {
1479     if (N.getOpcode() == ISD::SDIV)
1480       if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1481         // FIXME: These special cases should be handled by the lowering impl!
1482         unsigned RHS = CN->getValue();
1483         bool isNeg = false;
1484         if ((int)RHS < 0) {
1485           isNeg = true;
1486           RHS = -RHS;
1487         }
1488         if (RHS && (RHS & (RHS-1)) == 0) {   // Signed division by power of 2?
1489           unsigned Log = log2(RHS);
1490           unsigned TmpReg = MakeReg(N.getValueType());
1491           unsigned SAROpc, SHROpc, ADDOpc, NEGOpc;
1492           switch (N.getValueType()) {
1493           default: assert("Unknown type to signed divide!");
1494           case MVT::i8:
1495             SAROpc = X86::SAR8ri;
1496             SHROpc = X86::SHR8ri;
1497             ADDOpc = X86::ADD8rr;
1498             NEGOpc = X86::NEG8r;
1499             break;
1500           case MVT::i16:
1501             SAROpc = X86::SAR16ri;
1502             SHROpc = X86::SHR16ri;
1503             ADDOpc = X86::ADD16rr;
1504             NEGOpc = X86::NEG16r;
1505             break;
1506           case MVT::i32:
1507             SAROpc = X86::SAR32ri;
1508             SHROpc = X86::SHR32ri;
1509             ADDOpc = X86::ADD32rr;
1510             NEGOpc = X86::NEG32r;
1511             break;
1512           }
1513           Tmp1 = SelectExpr(N.getOperand(0));
1514           BuildMI(BB, SAROpc, 2, TmpReg).addReg(Tmp1).addImm(Log-1);
1515           unsigned TmpReg2 = MakeReg(N.getValueType());
1516           BuildMI(BB, SHROpc, 2, TmpReg2).addReg(TmpReg).addImm(32-Log);
1517           unsigned TmpReg3 = MakeReg(N.getValueType());
1518           BuildMI(BB, ADDOpc, 2, TmpReg3).addReg(Tmp1).addReg(TmpReg2);
1519
1520           unsigned TmpReg4 = isNeg ? MakeReg(N.getValueType()) : Result;
1521           BuildMI(BB, SAROpc, 2, TmpReg4).addReg(TmpReg3).addImm(Log);
1522           if (isNeg)
1523             BuildMI(BB, NEGOpc, 1, Result).addReg(TmpReg4);
1524           return Result;
1525         }
1526       }
1527
1528     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1529       Tmp1 = SelectExpr(N.getOperand(0));
1530       Tmp2 = SelectExpr(N.getOperand(1));
1531     } else {
1532       Tmp2 = SelectExpr(N.getOperand(1));
1533       Tmp1 = SelectExpr(N.getOperand(0));
1534     }
1535
1536     bool isSigned = N.getOpcode() == ISD::SDIV || N.getOpcode() == ISD::SREM;
1537     bool isDiv    = N.getOpcode() == ISD::SDIV || N.getOpcode() == ISD::UDIV;
1538     unsigned LoReg, HiReg, DivOpcode, MovOpcode, ClrOpcode, SExtOpcode;
1539     switch (N.getValueType()) {
1540     default: assert(0 && "Cannot sdiv this type!");
1541     case MVT::i8:
1542       DivOpcode = isSigned ? X86::IDIV8r : X86::DIV8r;
1543       LoReg = X86::AL;
1544       HiReg = X86::AH;
1545       MovOpcode = X86::MOV8rr;
1546       ClrOpcode = X86::MOV8ri;
1547       SExtOpcode = X86::CBW;
1548       break;
1549     case MVT::i16:
1550       DivOpcode = isSigned ? X86::IDIV16r : X86::DIV16r;
1551       LoReg = X86::AX;
1552       HiReg = X86::DX;
1553       MovOpcode = X86::MOV16rr;
1554       ClrOpcode = X86::MOV16ri;
1555       SExtOpcode = X86::CWD;
1556       break;
1557     case MVT::i32:
1558       DivOpcode = isSigned ? X86::IDIV32r : X86::DIV32r;
1559       LoReg =X86::EAX;
1560       HiReg = X86::EDX;
1561       MovOpcode = X86::MOV32rr;
1562       ClrOpcode = X86::MOV32ri;
1563       SExtOpcode = X86::CDQ;
1564       break;
1565     case MVT::i64: assert(0 && "FIXME: implement i64 DIV/REM libcalls!");
1566     case MVT::f32:
1567     case MVT::f64:
1568       if (N.getOpcode() == ISD::SDIV)
1569         BuildMI(BB, X86::FpDIV, 2, Result).addReg(Tmp1).addReg(Tmp2);
1570       else
1571         assert(0 && "FIXME: Emit frem libcall to fmod!");
1572       return Result;
1573     }
1574
1575     // Set up the low part.
1576     BuildMI(BB, MovOpcode, 1, LoReg).addReg(Tmp1);
1577
1578     if (isSigned) {
1579       // Sign extend the low part into the high part.
1580       BuildMI(BB, SExtOpcode, 0);
1581     } else {
1582       // Zero out the high part, effectively zero extending the input.
1583       BuildMI(BB, ClrOpcode, 1, HiReg).addImm(0);
1584     }
1585
1586     // Emit the DIV/IDIV instruction.
1587     BuildMI(BB, DivOpcode, 1).addReg(Tmp2);
1588
1589     // Get the result of the divide or rem.
1590     BuildMI(BB, MovOpcode, 1, Result).addReg(isDiv ? LoReg : HiReg);
1591     return Result;
1592   }
1593
1594   case ISD::SHL:
1595     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1596       switch (N.getValueType()) {
1597       default: assert(0 && "Cannot shift this type!");
1598       case MVT::i8:  Opc = X86::SHL8ri; break;
1599       case MVT::i16: Opc = X86::SHL16ri; break;
1600       case MVT::i32: Opc = X86::SHL32ri; break;
1601       }
1602       Tmp1 = SelectExpr(N.getOperand(0));
1603       BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
1604       return Result;
1605     }
1606
1607     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1608       Tmp1 = SelectExpr(N.getOperand(0));
1609       Tmp2 = SelectExpr(N.getOperand(1));
1610     } else {
1611       Tmp2 = SelectExpr(N.getOperand(1));
1612       Tmp1 = SelectExpr(N.getOperand(0));
1613     }
1614
1615     switch (N.getValueType()) {
1616     default: assert(0 && "Cannot shift this type!");
1617     case MVT::i8 : Opc = X86::SHL8rCL; break;
1618     case MVT::i16: Opc = X86::SHL16rCL; break;
1619     case MVT::i32: Opc = X86::SHL32rCL; break;
1620     }
1621     BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(Tmp2);
1622     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1623     return Result;
1624   case ISD::SRL:
1625     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1626       switch (N.getValueType()) {
1627       default: assert(0 && "Cannot shift this type!");
1628       case MVT::i8:  Opc = X86::SHR8ri; break;
1629       case MVT::i16: Opc = X86::SHR16ri; break;
1630       case MVT::i32: Opc = X86::SHR32ri; break;
1631       }
1632       Tmp1 = SelectExpr(N.getOperand(0));
1633       BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
1634       return Result;
1635     }
1636
1637     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1638       Tmp1 = SelectExpr(N.getOperand(0));
1639       Tmp2 = SelectExpr(N.getOperand(1));
1640     } else {
1641       Tmp2 = SelectExpr(N.getOperand(1));
1642       Tmp1 = SelectExpr(N.getOperand(0));
1643     }
1644
1645     switch (N.getValueType()) {
1646     default: assert(0 && "Cannot shift this type!");
1647     case MVT::i8 : Opc = X86::SHR8rCL; break;
1648     case MVT::i16: Opc = X86::SHR16rCL; break;
1649     case MVT::i32: Opc = X86::SHR32rCL; break;
1650     }
1651     BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(Tmp2);
1652     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1653     return Result;
1654   case ISD::SRA:
1655     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1656       switch (N.getValueType()) {
1657       default: assert(0 && "Cannot shift this type!");
1658       case MVT::i8:  Opc = X86::SAR8ri; break;
1659       case MVT::i16: Opc = X86::SAR16ri; break;
1660       case MVT::i32: Opc = X86::SAR32ri; break;
1661       }
1662       Tmp1 = SelectExpr(N.getOperand(0));
1663       BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
1664       return Result;
1665     }
1666
1667     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1668       Tmp1 = SelectExpr(N.getOperand(0));
1669       Tmp2 = SelectExpr(N.getOperand(1));
1670     } else {
1671       Tmp2 = SelectExpr(N.getOperand(1));
1672       Tmp1 = SelectExpr(N.getOperand(0));
1673     }
1674
1675     switch (N.getValueType()) {
1676     default: assert(0 && "Cannot shift this type!");
1677     case MVT::i8 : Opc = X86::SAR8rCL; break;
1678     case MVT::i16: Opc = X86::SAR16rCL; break;
1679     case MVT::i32: Opc = X86::SAR32rCL; break;
1680     }
1681     BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(Tmp2);
1682     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
1683     return Result;
1684
1685   case ISD::SETCC:
1686     EmitCMP(N.getOperand(0), N.getOperand(1));
1687     EmitSetCC(BB, Result, cast<SetCCSDNode>(N)->getCondition(),
1688               MVT::isFloatingPoint(N.getOperand(1).getValueType()));
1689     return Result;
1690   case ISD::LOAD: {
1691     // The chain for this load is now lowered.
1692     LoweredTokens.insert(SDOperand(Node, 1));
1693
1694     // Make sure we generate both values.
1695     if (Result != 1)
1696       ExprMap[N.getValue(1)] = 1;   // Generate the token
1697     else
1698       Result = ExprMap[N.getValue(0)] = MakeReg(N.getValue(0).getValueType());
1699
1700     switch (Node->getValueType(0)) {
1701     default: assert(0 && "Cannot load this type!");
1702     case MVT::i1:
1703     case MVT::i8:  Opc = X86::MOV8rm; break;
1704     case MVT::i16: Opc = X86::MOV16rm; break;
1705     case MVT::i32: Opc = X86::MOV32rm; break;
1706     case MVT::f32: Opc = X86::FLD32m; ContainsFPCode = true; break;
1707     case MVT::f64: Opc = X86::FLD64m; ContainsFPCode = true; break;
1708     }
1709
1710     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N.getOperand(1))){
1711       Select(N.getOperand(0));
1712       addConstantPoolReference(BuildMI(BB, Opc, 4, Result), CP->getIndex());
1713     } else {
1714       X86AddressMode AM;
1715       if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1716         Select(N.getOperand(0));
1717         SelectAddress(N.getOperand(1), AM);
1718       } else {
1719         SelectAddress(N.getOperand(1), AM);
1720         Select(N.getOperand(0));
1721       }
1722       addFullAddress(BuildMI(BB, Opc, 4, Result), AM);
1723     }
1724     return Result;
1725   }
1726   case ISD::DYNAMIC_STACKALLOC:
1727     // Generate both result values.
1728     if (Result != 1)
1729       ExprMap[N.getValue(1)] = 1;   // Generate the token
1730     else
1731       Result = ExprMap[N.getValue(0)] = MakeReg(N.getValue(0).getValueType());
1732
1733     // FIXME: We are currently ignoring the requested alignment for handling
1734     // greater than the stack alignment.  This will need to be revisited at some
1735     // point.  Align = N.getOperand(2);
1736
1737     if (!isa<ConstantSDNode>(N.getOperand(2)) ||
1738         cast<ConstantSDNode>(N.getOperand(2))->getValue() != 0) {
1739       std::cerr << "Cannot allocate stack object with greater alignment than"
1740                 << " the stack alignment yet!";
1741       abort();
1742     }
1743
1744     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1745       Select(N.getOperand(0));
1746       BuildMI(BB, X86::SUB32ri, 2, X86::ESP).addReg(X86::ESP)
1747         .addImm(CN->getValue());
1748     } else {
1749       if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1750         Select(N.getOperand(0));
1751         Tmp1 = SelectExpr(N.getOperand(1));
1752       } else {
1753         Tmp1 = SelectExpr(N.getOperand(1));
1754         Select(N.getOperand(0));
1755       }
1756
1757       // Subtract size from stack pointer, thereby allocating some space.
1758       BuildMI(BB, X86::SUB32rr, 2, X86::ESP).addReg(X86::ESP).addReg(Tmp1);
1759     }
1760
1761     // Put a pointer to the space into the result register, by copying the stack
1762     // pointer.
1763     BuildMI(BB, X86::MOV32rr, 1, Result).addReg(X86::ESP);
1764     return Result;
1765
1766   case ISD::CALL:
1767     // The chain for this call is now lowered.
1768     LoweredTokens.insert(N.getValue(Node->getNumValues()-1));
1769
1770     if (GlobalAddressSDNode *GASD =
1771                dyn_cast<GlobalAddressSDNode>(N.getOperand(1))) {
1772       Select(N.getOperand(0));
1773       BuildMI(BB, X86::CALLpcrel32, 1).addGlobalAddress(GASD->getGlobal(),true);
1774     } else if (ExternalSymbolSDNode *ESSDN =
1775                dyn_cast<ExternalSymbolSDNode>(N.getOperand(1))) {
1776       Select(N.getOperand(0));
1777       BuildMI(BB, X86::CALLpcrel32,
1778               1).addExternalSymbol(ESSDN->getSymbol(), true);
1779     } else {
1780       if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1781         Select(N.getOperand(0));
1782         Tmp1 = SelectExpr(N.getOperand(1));
1783       } else {
1784         Tmp1 = SelectExpr(N.getOperand(1));
1785         Select(N.getOperand(0));
1786       }
1787
1788       BuildMI(BB, X86::CALL32r, 1).addReg(Tmp1);
1789     }
1790     switch (Node->getValueType(0)) {
1791     default: assert(0 && "Unknown value type for call result!");
1792     case MVT::Other: return 1;
1793     case MVT::i1:
1794     case MVT::i8:
1795       BuildMI(BB, X86::MOV8rr, 1, Result).addReg(X86::AL);
1796       break;
1797     case MVT::i16:
1798       BuildMI(BB, X86::MOV16rr, 1, Result).addReg(X86::AX);
1799       break;
1800     case MVT::i32:
1801       BuildMI(BB, X86::MOV32rr, 1, Result).addReg(X86::EAX);
1802       if (Node->getValueType(1) == MVT::i32)
1803         BuildMI(BB, X86::MOV32rr, 1, Result+1).addReg(X86::EDX);
1804       break;
1805     case MVT::f32:
1806     case MVT::f64:     // Floating-point return values live in %ST(0)
1807       ContainsFPCode = true;
1808       BuildMI(BB, X86::FpGETRESULT, 1, Result);
1809       break;
1810     }
1811     return Result+N.ResNo;
1812   }
1813
1814   return 0;
1815 }
1816
1817 void ISel::Select(SDOperand N) {
1818   unsigned Tmp1, Tmp2, Opc;
1819
1820   // FIXME: Disable for our current expansion model!
1821   if (/*!N->hasOneUse() &&*/ !LoweredTokens.insert(N).second)
1822     return;  // Already selected.
1823
1824   switch (N.getOpcode()) {
1825   default:
1826     N.Val->dump(); std::cerr << "\n";
1827     assert(0 && "Node not handled yet!");
1828   case ISD::EntryToken: return;  // Noop
1829   case ISD::CopyToReg:
1830     Select(N.getOperand(0));
1831     Tmp1 = SelectExpr(N.getOperand(1));
1832     Tmp2 = cast<CopyRegSDNode>(N)->getReg();
1833
1834     if (Tmp1 != Tmp2) {
1835       switch (N.getOperand(1).getValueType()) {
1836       default: assert(0 && "Invalid type for operation!");
1837       case MVT::i1:
1838       case MVT::i8:  Opc = X86::MOV8rr; break;
1839       case MVT::i16: Opc = X86::MOV16rr; break;
1840       case MVT::i32: Opc = X86::MOV32rr; break;
1841       case MVT::f32:
1842       case MVT::f64: Opc = X86::FpMOV; ContainsFPCode = true; break;
1843       }
1844       BuildMI(BB, Opc, 1, Tmp2).addReg(Tmp1);
1845     }
1846     return;
1847   case ISD::RET:
1848     switch (N.getNumOperands()) {
1849     default:
1850       assert(0 && "Unknown return instruction!");
1851     case 3:
1852       assert(N.getOperand(1).getValueType() == MVT::i32 &&
1853              N.getOperand(2).getValueType() == MVT::i32 &&
1854              "Unknown two-register value!");
1855       if (getRegPressure(N.getOperand(1)) > getRegPressure(N.getOperand(2))) {
1856         Tmp1 = SelectExpr(N.getOperand(1));
1857         Tmp2 = SelectExpr(N.getOperand(2));
1858       } else {
1859         Tmp2 = SelectExpr(N.getOperand(2));
1860         Tmp1 = SelectExpr(N.getOperand(1));
1861       }
1862       Select(N.getOperand(0));
1863
1864       BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Tmp1);
1865       BuildMI(BB, X86::MOV32rr, 1, X86::EDX).addReg(Tmp2);
1866       // Declare that EAX & EDX are live on exit.
1867       BuildMI(BB, X86::IMPLICIT_USE, 3).addReg(X86::EAX).addReg(X86::EDX)
1868         .addReg(X86::ESP);
1869       break;
1870     case 2:
1871       if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
1872         Select(N.getOperand(0));
1873         Tmp1 = SelectExpr(N.getOperand(1));
1874       } else {
1875         Tmp1 = SelectExpr(N.getOperand(1));
1876         Select(N.getOperand(0));
1877       }
1878       switch (N.getOperand(1).getValueType()) {
1879       default: assert(0 && "All other types should have been promoted!!");
1880       case MVT::f64:
1881         BuildMI(BB, X86::FpSETRESULT, 1).addReg(Tmp1);
1882         // Declare that top-of-stack is live on exit
1883         BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::ST0).addReg(X86::ESP);
1884         break;
1885       case MVT::i32:
1886         BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Tmp1);
1887         BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::EAX).addReg(X86::ESP);
1888         break;
1889       }
1890       break;
1891     case 1:
1892       Select(N.getOperand(0));
1893       break;
1894     }
1895     BuildMI(BB, X86::RET, 0); // Just emit a 'ret' instruction
1896     return;
1897   case ISD::BR: {
1898     Select(N.getOperand(0));
1899     MachineBasicBlock *Dest =
1900       cast<BasicBlockSDNode>(N.getOperand(1))->getBasicBlock();
1901     BuildMI(BB, X86::JMP, 1).addMBB(Dest);
1902     return;
1903   }
1904
1905   case ISD::BRCOND: {
1906     MachineBasicBlock *Dest =
1907       cast<BasicBlockSDNode>(N.getOperand(2))->getBasicBlock();
1908
1909     bool ChainFirst =
1910       getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1));
1911
1912     if (ChainFirst) Select(N.getOperand(0));
1913
1914     // Try to fold a setcc into the branch.  If this fails, emit a test/jne
1915     // pair.
1916     if (EmitBranchCC(Dest, N.getOperand(1))) {
1917       Tmp1 = SelectExpr(N.getOperand(1));
1918       BuildMI(BB, X86::TEST8rr, 2).addReg(Tmp1).addReg(Tmp1);
1919       BuildMI(BB, X86::JNE, 1).addMBB(Dest);
1920     }
1921
1922     if (!ChainFirst) Select(N.getOperand(0));
1923
1924     return;
1925   }
1926   case ISD::LOAD:
1927   case ISD::CALL:
1928   case ISD::DYNAMIC_STACKALLOC:
1929     SelectExpr(N);
1930     return;
1931   case ISD::STORE: {
1932     // Select the address.
1933     X86AddressMode AM;
1934
1935     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1936       Opc = 0;
1937       switch (CN->getValueType(0)) {
1938       default: assert(0 && "Invalid type for operation!");
1939       case MVT::i1:
1940       case MVT::i8:  Opc = X86::MOV8mi; break;
1941       case MVT::i16: Opc = X86::MOV16mi; break;
1942       case MVT::i32: Opc = X86::MOV32mi; break;
1943       case MVT::f32:
1944       case MVT::f64: break;
1945       }
1946       if (Opc) {
1947         if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(2))) {
1948           Select(N.getOperand(0));
1949           SelectAddress(N.getOperand(2), AM);
1950         } else {
1951           SelectAddress(N.getOperand(2), AM);
1952           Select(N.getOperand(0));
1953         }
1954         addFullAddress(BuildMI(BB, Opc, 4+1), AM).addImm(CN->getValue());
1955         return;
1956       }
1957     }
1958     switch (N.getOperand(1).getValueType()) {
1959     default: assert(0 && "Cannot store this type!");
1960     case MVT::i1:
1961     case MVT::i8:  Opc = X86::MOV8mr; break;
1962     case MVT::i16: Opc = X86::MOV16mr; break;
1963     case MVT::i32: Opc = X86::MOV32mr; break;
1964     case MVT::f32: Opc = X86::FST32m; break;
1965     case MVT::f64: Opc = X86::FST64m; break;
1966     }
1967
1968     std::vector<std::pair<unsigned, unsigned> > RP;
1969     RP.push_back(std::make_pair(getRegPressure(N.getOperand(0)), 0));
1970     RP.push_back(std::make_pair(getRegPressure(N.getOperand(1)), 1));
1971     RP.push_back(std::make_pair(getRegPressure(N.getOperand(2)), 2));
1972     std::sort(RP.begin(), RP.end());
1973
1974     for (unsigned i = 0; i != 3; ++i)
1975       switch (RP[2-i].second) {
1976       default: assert(0 && "Unknown operand number!");
1977       case 0: Select(N.getOperand(0)); break;
1978       case 1: Tmp1 = SelectExpr(N.getOperand(1)); break;
1979       case 2: SelectAddress(N.getOperand(2), AM); break;
1980       }
1981
1982     addFullAddress(BuildMI(BB, Opc, 4+1), AM).addReg(Tmp1);
1983     return;
1984   }
1985   case ISD::ADJCALLSTACKDOWN:
1986   case ISD::ADJCALLSTACKUP:
1987     Select(N.getOperand(0));
1988     Tmp1 = cast<ConstantSDNode>(N.getOperand(1))->getValue();
1989
1990     Opc = N.getOpcode() == ISD::ADJCALLSTACKDOWN ? X86::ADJCALLSTACKDOWN :
1991                                                    X86::ADJCALLSTACKUP;
1992     BuildMI(BB, Opc, 1).addImm(Tmp1);
1993     return;
1994   }
1995   assert(0 && "Should not be reached!");
1996 }
1997
1998
1999 /// createX86PatternInstructionSelector - This pass converts an LLVM function
2000 /// into a machine code representation using pattern matching and a machine
2001 /// description file.
2002 ///
2003 FunctionPass *llvm::createX86PatternInstructionSelector(TargetMachine &TM) {
2004   return new ISel(TM);
2005 }