lib/Target/X86/X86ISelPattern.cpp

   1 //===-- X86ISelPattern.cpp - A pattern matching inst selector for X86 -----===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by the LLVM research group and is distributed under
   6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines a pattern matching instruction selector for X86.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "X86.h"
  15 #include "X86InstrBuilder.h"
  16 #include "X86RegisterInfo.h"
  17 #include "X86Subtarget.h"
  18 #include "llvm/CallingConv.h"
  19 #include "llvm/Constants.h"
  20 #include "llvm/Instructions.h"
  21 #include "llvm/Function.h"
  22 #include "llvm/CodeGen/MachineConstantPool.h"
  23 #include "llvm/CodeGen/MachineFunction.h"
  24 #include "llvm/CodeGen/MachineFrameInfo.h"
  25 #include "llvm/CodeGen/SelectionDAG.h"
  26 #include "llvm/CodeGen/SelectionDAGISel.h"
  27 #include "llvm/CodeGen/SSARegMap.h"
  28 #include "llvm/Target/TargetData.h"
  29 #include "llvm/Target/TargetLowering.h"
  30 #include "llvm/Target/TargetMachine.h"
  31 #include "llvm/Target/TargetOptions.h"
  32 #include "llvm/Support/CFG.h"
  33 #include "llvm/Support/MathExtras.h"
  34 #include "llvm/ADT/Statistic.h"
  35 #include <set>
  36 #include <algorithm>
  37 using namespace llvm;
  38
  39 // FIXME: temporary.
  40 #include "llvm/Support/CommandLine.h"
  41 static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
  42                                   cl::desc("Enable fastcc on X86"));
  43
  44 namespace {
  45   // X86 Specific DAG Nodes
  46   namespace X86ISD {
  47     enum NodeType {
  48       // Start the numbering where the builtin ops leave off.
  49       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  50
  51       /// FILD64m - This instruction implements SINT_TO_FP with a
  52       /// 64-bit source in memory and a FP reg result.  This corresponds to
  53       /// the X86::FILD64m instruction.  It has two inputs (token chain and
  54       /// address) and two outputs (FP value and token chain).
  55       FILD64m,
  56
  57       /// CALL/TAILCALL - These operations represent an abstract X86 call
  58       /// instruction, which includes a bunch of information.  In particular the
  59       /// operands of these node are:
  60       ///
  61       ///     #0 - The incoming token chain
  62       ///     #1 - The callee
  63       ///     #2 - The number of arg bytes the caller pushes on the stack.
  64       ///     #3 - The number of arg bytes the callee pops off the stack.
  65       ///     #4 - The value to pass in AL/AX/EAX (optional)
  66       ///     #5 - The value to pass in DL/DX/EDX (optional)
  67       ///
  68       /// The result values of these nodes are:
  69       ///
  70       ///     #0 - The outgoing token chain
  71       ///     #1 - The first register result value (optional)
  72       ///     #2 - The second register result value (optional)
  73       ///
  74       /// The CALL vs TAILCALL distinction boils down to whether the callee is
  75       /// known not to modify the caller's stack frame, as is standard with
  76       /// LLVM.
  77       CALL,
  78       TAILCALL,
  79     };
  80   }
  81 }
  82
  83 //===----------------------------------------------------------------------===//
  84 //  X86TargetLowering - X86 Implementation of the TargetLowering interface
  85 namespace {
  86   class X86TargetLowering : public TargetLowering {
  87     int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
  88     int ReturnAddrIndex;              // FrameIndex for return slot.
  89     int BytesToPopOnReturn;           // Number of arg bytes ret should pop.
  90     int BytesCallerReserves;          // Number of arg bytes caller makes.
  91   public:
  92     X86TargetLowering(TargetMachine &TM) : TargetLowering(TM) {
  93       // Set up the TargetLowering object.
  94
  95       // X86 is weird, it always uses i8 for shift amounts and setcc results.
  96       setShiftAmountType(MVT::i8);
  97       setSetCCResultType(MVT::i8);
  98       setSetCCResultContents(ZeroOrOneSetCCResult);
  99       setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
 100
 101       // Set up the register classes.
 102       // FIXME: Eliminate these two classes when legalize can handle promotions
 103       // well.
 104       addRegisterClass(MVT::i1, X86::R8RegisterClass);
 105       addRegisterClass(MVT::i8, X86::R8RegisterClass);
 106       addRegisterClass(MVT::i16, X86::R16RegisterClass);
 107       addRegisterClass(MVT::i32, X86::R32RegisterClass);
 108
 109       setOperationAction(ISD::SINT_TO_FP       , MVT::i64  , Custom);
 110       setOperationAction(ISD::BRCONDTWOWAY     , MVT::Other, Expand);
 111       setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
 112       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
 113       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
 114       setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
 115       setOperationAction(ISD::SEXTLOAD         , MVT::i1   , Expand);
 116       setOperationAction(ISD::SREM             , MVT::f64  , Expand);
 117       setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
 118       setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
 119       setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
 120       setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
 121       setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
 122       setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
 123       setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
 124       setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
 125       setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
 126
 127       setOperationAction(ISD::READIO           , MVT::i1   , Expand);
 128       setOperationAction(ISD::READIO           , MVT::i8   , Expand);
 129       setOperationAction(ISD::READIO           , MVT::i16  , Expand);
 130       setOperationAction(ISD::READIO           , MVT::i32  , Expand);
 131       setOperationAction(ISD::WRITEIO          , MVT::i1   , Expand);
 132       setOperationAction(ISD::WRITEIO          , MVT::i8   , Expand);
 133       setOperationAction(ISD::WRITEIO          , MVT::i16  , Expand);
 134       setOperationAction(ISD::WRITEIO          , MVT::i32  , Expand);
 135
 136       // These should be promoted to a larger select which is supported.
 137       setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
 138       setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
 139
 140       if (X86ScalarSSE) {
 141         // Set up the FP register classes.
 142         addRegisterClass(MVT::f32, X86::RXMMRegisterClass);
 143         addRegisterClass(MVT::f64, X86::RXMMRegisterClass);
 144
 145         setOperationAction(ISD::EXTLOAD,  MVT::f32, Expand);
 146         setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand);
 147
 148         // We don't support sin/cos/sqrt/fmod
 149         setOperationAction(ISD::FSIN , MVT::f64, Expand);
 150         setOperationAction(ISD::FCOS , MVT::f64, Expand);
 151         setOperationAction(ISD::FABS , MVT::f64, Expand);
 152         setOperationAction(ISD::FNEG , MVT::f64, Expand);
 153         setOperationAction(ISD::SREM , MVT::f64, Expand);
 154         setOperationAction(ISD::FSIN , MVT::f32, Expand);
 155         setOperationAction(ISD::FCOS , MVT::f32, Expand);
 156         setOperationAction(ISD::FABS , MVT::f32, Expand);
 157         setOperationAction(ISD::FNEG , MVT::f32, Expand);
 158         setOperationAction(ISD::SREM , MVT::f32, Expand);
 159       } else {
 160         // Set up the FP register classes.
 161         addRegisterClass(MVT::f64, X86::RFPRegisterClass);
 162
 163         if (!UnsafeFPMath) {
 164           setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
 165           setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
 166         }
 167
 168         addLegalFPImmediate(+0.0); // FLD0
 169         addLegalFPImmediate(+1.0); // FLD1
 170         addLegalFPImmediate(-0.0); // FLD0/FCHS
 171         addLegalFPImmediate(-1.0); // FLD1/FCHS
 172       }
 173       computeRegisterProperties();
 174     }
 175
 176     // Return the number of bytes that a function should pop when it returns (in
 177     // addition to the space used by the return address).
 178     //
 179     unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
 180
 181     // Return the number of bytes that the caller reserves for arguments passed
 182     // to this function.
 183     unsigned getBytesCallerReserves() const { return BytesCallerReserves; }
 184
 185     /// LowerOperation - Provide custom lowering hooks for some operations.
 186     ///
 187     virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
 188
 189     /// LowerArguments - This hook must be implemented to indicate how we should
 190     /// lower the arguments for the specified function, into the specified DAG.
 191     virtual std::vector<SDOperand>
 192     LowerArguments(Function &F, SelectionDAG &DAG);
 193
 194     /// LowerCallTo - This hook lowers an abstract call to a function into an
 195     /// actual call.
 196     virtual std::pair<SDOperand, SDOperand>
 197     LowerCallTo(SDOperand Chain, const Type *RetTy, bool isVarArg, unsigned CC,
 198                 bool isTailCall, SDOperand Callee, ArgListTy &Args,
 199                 SelectionDAG &DAG);
 200
 201     virtual SDOperand LowerVAStart(SDOperand Chain, SDOperand VAListP,
 202                                    Value *VAListV, SelectionDAG &DAG);
 203     virtual std::pair<SDOperand,SDOperand>
 204       LowerVAArg(SDOperand Chain, SDOperand VAListP, Value *VAListV,
 205                  const Type *ArgTy, SelectionDAG &DAG);
 206
 207     virtual std::pair<SDOperand, SDOperand>
 208     LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain, unsigned Depth,
 209                             SelectionDAG &DAG);
 210
 211     SDOperand getReturnAddressFrameIndex(SelectionDAG &DAG);
 212
 213   private:
 214     // C Calling Convention implementation.
 215     std::vector<SDOperand> LowerCCCArguments(Function &F, SelectionDAG &DAG);
 216     std::pair<SDOperand, SDOperand>
 217     LowerCCCCallTo(SDOperand Chain, const Type *RetTy, bool isVarArg,
 218                    bool isTailCall,
 219                    SDOperand Callee, ArgListTy &Args, SelectionDAG &DAG);
 220
 221     // Fast Calling Convention implementation.
 222     std::vector<SDOperand> LowerFastCCArguments(Function &F, SelectionDAG &DAG);
 223     std::pair<SDOperand, SDOperand>
 224     LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, bool isTailCall,
 225                       SDOperand Callee, ArgListTy &Args, SelectionDAG &DAG);
 226   };
 227 }
 228
 229 std::vector<SDOperand>
 230 X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
 231   if (F.getCallingConv() == CallingConv::Fast && EnableFastCC)
 232     return LowerFastCCArguments(F, DAG);
 233   return LowerCCCArguments(F, DAG);
 234 }
 235
 236 std::pair<SDOperand, SDOperand>
 237 X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy,
 238                                bool isVarArg, unsigned CallingConv,
 239                                bool isTailCall,
 240                                SDOperand Callee, ArgListTy &Args,
 241                                SelectionDAG &DAG) {
 242   assert((!isVarArg || CallingConv == CallingConv::C) &&
 243          "Only C takes varargs!");
 244   if (CallingConv == CallingConv::Fast && EnableFastCC)
 245     return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG);
 246   return  LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG);
 247 }
 248
 249 //===----------------------------------------------------------------------===//
 250 //                    C Calling Convention implementation
 251 //===----------------------------------------------------------------------===//
 252
 253 std::vector<SDOperand>
 254 X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) {
 255   std::vector<SDOperand> ArgValues;
 256
 257   MachineFunction &MF = DAG.getMachineFunction();
 258   MachineFrameInfo *MFI = MF.getFrameInfo();
 259
 260   // Add DAG nodes to load the arguments...  On entry to a function on the X86,
 261   // the stack frame looks like this:
 262   //
 263   // [ESP] -- return address
 264   // [ESP + 4] -- first argument (leftmost lexically)
 265   // [ESP + 8] -- second argument, if first argument is four bytes in size
 266   //    ...
 267   //
 268   unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
 269   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
 270     MVT::ValueType ObjectVT = getValueType(I->getType());
 271     unsigned ArgIncrement = 4;
 272     unsigned ObjSize;
 273     switch (ObjectVT) {
 274     default: assert(0 && "Unhandled argument type!");
 275     case MVT::i1:
 276     case MVT::i8:  ObjSize = 1;                break;
 277     case MVT::i16: ObjSize = 2;                break;
 278     case MVT::i32: ObjSize = 4;                break;
 279     case MVT::i64: ObjSize = ArgIncrement = 8; break;
 280     case MVT::f32: ObjSize = 4;                break;
 281     case MVT::f64: ObjSize = ArgIncrement = 8; break;
 282     }
 283     // Create the frame index object for this incoming parameter...
 284     int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
 285
 286     // Create the SelectionDAG nodes corresponding to a load from this parameter
 287     SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
 288
 289     // Don't codegen dead arguments.  FIXME: remove this check when we can nuke
 290     // dead loads.
 291     SDOperand ArgValue;
 292     if (!I->use_empty())
 293       ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
 294                              DAG.getSrcValue(NULL));
 295     else {
 296       if (MVT::isInteger(ObjectVT))
 297         ArgValue = DAG.getConstant(0, ObjectVT);
 298       else
 299         ArgValue = DAG.getConstantFP(0, ObjectVT);
 300     }
 301     ArgValues.push_back(ArgValue);
 302
 303     ArgOffset += ArgIncrement;   // Move on to the next argument...
 304   }
 305
 306   // If the function takes variable number of arguments, make a frame index for
 307   // the start of the first vararg value... for expansion of llvm.va_start.
 308   if (F.isVarArg())
 309     VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
 310   ReturnAddrIndex = 0;     // No return address slot generated yet.
 311   BytesToPopOnReturn = 0;  // Callee pops nothing.
 312   BytesCallerReserves = ArgOffset;
 313
 314   // Finally, inform the code generator which regs we return values in.
 315   switch (getValueType(F.getReturnType())) {
 316   default: assert(0 && "Unknown type!");
 317   case MVT::isVoid: break;
 318   case MVT::i1:
 319   case MVT::i8:
 320   case MVT::i16:
 321   case MVT::i32:
 322     MF.addLiveOut(X86::EAX);
 323     break;
 324   case MVT::i64:
 325     MF.addLiveOut(X86::EAX);
 326     MF.addLiveOut(X86::EDX);
 327     break;
 328   case MVT::f32:
 329   case MVT::f64:
 330     MF.addLiveOut(X86::ST0);
 331     break;
 332   }
 333   return ArgValues;
 334 }
 335
 336 std::pair<SDOperand, SDOperand>
 337 X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy,
 338                                   bool isVarArg, bool isTailCall,
 339                                   SDOperand Callee, ArgListTy &Args,
 340                                   SelectionDAG &DAG) {
 341   // Count how many bytes are to be pushed on the stack.
 342   unsigned NumBytes = 0;
 343
 344   if (Args.empty()) {
 345     // Save zero bytes.
 346     Chain = DAG.getNode(ISD::CALLSEQ_START, MVT::Other, Chain,
 347                         DAG.getConstant(0, getPointerTy()));
 348   } else {
 349     for (unsigned i = 0, e = Args.size(); i != e; ++i)
 350       switch (getValueType(Args[i].second)) {
 351       default: assert(0 && "Unknown value type!");
 352       case MVT::i1:
 353       case MVT::i8:
 354       case MVT::i16:
 355       case MVT::i32:
 356       case MVT::f32:
 357         NumBytes += 4;
 358         break;
 359       case MVT::i64:
 360       case MVT::f64:
 361         NumBytes += 8;
 362         break;
 363       }
 364
 365     Chain = DAG.getNode(ISD::CALLSEQ_START, MVT::Other, Chain,
 366                         DAG.getConstant(NumBytes, getPointerTy()));
 367
 368     // Arguments go on the stack in reverse order, as specified by the ABI.
 369     unsigned ArgOffset = 0;
 370     SDOperand StackPtr = DAG.getCopyFromReg(X86::ESP, MVT::i32,
 371                                             DAG.getEntryNode());
 372     std::vector<SDOperand> Stores;
 373
 374     for (unsigned i = 0, e = Args.size(); i != e; ++i) {
 375       SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
 376       PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
 377
 378       switch (getValueType(Args[i].second)) {
 379       default: assert(0 && "Unexpected ValueType for argument!");
 380       case MVT::i1:
 381       case MVT::i8:
 382       case MVT::i16:
 383         // Promote the integer to 32 bits.  If the input type is signed use a
 384         // sign extend, otherwise use a zero extend.
 385         if (Args[i].second->isSigned())
 386           Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
 387         else
 388           Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
 389
 390         // FALL THROUGH
 391       case MVT::i32:
 392       case MVT::f32:
 393         Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
 394                                      Args[i].first, PtrOff,
 395                                      DAG.getSrcValue(NULL)));
 396         ArgOffset += 4;
 397         break;
 398       case MVT::i64:
 399       case MVT::f64:
 400         Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
 401                                      Args[i].first, PtrOff,
 402                                      DAG.getSrcValue(NULL)));
 403         ArgOffset += 8;
 404         break;
 405       }
 406     }
 407     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores);
 408   }
 409
 410   std::vector<MVT::ValueType> RetVals;
 411   MVT::ValueType RetTyVT = getValueType(RetTy);
 412   RetVals.push_back(MVT::Other);
 413
 414   // The result values produced have to be legal.  Promote the result.
 415   switch (RetTyVT) {
 416   case MVT::isVoid: break;
 417   default:
 418     RetVals.push_back(RetTyVT);
 419     break;
 420   case MVT::i1:
 421   case MVT::i8:
 422   case MVT::i16:
 423     RetVals.push_back(MVT::i32);
 424     break;
 425   case MVT::f32:
 426     if (X86ScalarSSE)
 427       RetVals.push_back(MVT::f32);
 428     else
 429       RetVals.push_back(MVT::f64);
 430     break;
 431   case MVT::i64:
 432     RetVals.push_back(MVT::i32);
 433     RetVals.push_back(MVT::i32);
 434     break;
 435   }
 436   std::vector<SDOperand> Ops;
 437   Ops.push_back(Chain);
 438   Ops.push_back(Callee);
 439   Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
 440   Ops.push_back(DAG.getConstant(0, getPointerTy()));
 441   SDOperand TheCall = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
 442                                   RetVals, Ops);
 443   Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, TheCall);
 444
 445   SDOperand ResultVal;
 446   switch (RetTyVT) {
 447   case MVT::isVoid: break;
 448   default:
 449     ResultVal = TheCall.getValue(1);
 450     break;
 451   case MVT::i1:
 452   case MVT::i8:
 453   case MVT::i16:
 454     ResultVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, TheCall.getValue(1));
 455     break;
 456   case MVT::f32:
 457     // FIXME: we would really like to remember that this FP_ROUND operation is
 458     // okay to eliminate if we allow excess FP precision.
 459     ResultVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, TheCall.getValue(1));
 460     break;
 461   case MVT::i64:
 462     ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, TheCall.getValue(1),
 463                             TheCall.getValue(2));
 464     break;
 465   }
 466
 467   return std::make_pair(ResultVal, Chain);
 468 }
 469
 470 SDOperand
 471 X86TargetLowering::LowerVAStart(SDOperand Chain, SDOperand VAListP,
 472                                 Value *VAListV, SelectionDAG &DAG) {
 473   // vastart just stores the address of the VarArgsFrameIndex slot.
 474   SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
 475   return DAG.getNode(ISD::STORE, MVT::Other, Chain, FR, VAListP,
 476                      DAG.getSrcValue(VAListV));
 477 }
 478
 479
 480 std::pair<SDOperand,SDOperand>
 481 X86TargetLowering::LowerVAArg(SDOperand Chain, SDOperand VAListP,
 482                               Value *VAListV, const Type *ArgTy,
 483                               SelectionDAG &DAG) {
 484   MVT::ValueType ArgVT = getValueType(ArgTy);
 485   SDOperand Val = DAG.getLoad(MVT::i32, Chain,
 486                               VAListP, DAG.getSrcValue(VAListV));
 487   SDOperand Result = DAG.getLoad(ArgVT, Chain, Val,
 488                                  DAG.getSrcValue(NULL));
 489   unsigned Amt;
 490   if (ArgVT == MVT::i32)
 491     Amt = 4;
 492   else {
 493     assert((ArgVT == MVT::i64 || ArgVT == MVT::f64) &&
 494            "Other types should have been promoted for varargs!");
 495     Amt = 8;
 496   }
 497   Val = DAG.getNode(ISD::ADD, Val.getValueType(), Val,
 498                     DAG.getConstant(Amt, Val.getValueType()));
 499   Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain,
 500                       Val, VAListP, DAG.getSrcValue(VAListV));
 501   return std::make_pair(Result, Chain);
 502 }
 503
 504 //===----------------------------------------------------------------------===//
 505 //                    Fast Calling Convention implementation
 506 //===----------------------------------------------------------------------===//
 507 //
 508 // The X86 'fast' calling convention passes up to two integer arguments in
 509 // registers (an appropriate portion of EAX/EDX), passes arguments in C order,
 510 // and requires that the callee pop its arguments off the stack (allowing proper
 511 // tail calls), and has the same return value conventions as C calling convs.
 512 //
 513 // This calling convention always arranges for the callee pop value to be 8n+4
 514 // bytes, which is needed for tail recursion elimination and stack alignment
 515 // reasons.
 516 //
 517 // Note that this can be enhanced in the future to pass fp vals in registers
 518 // (when we have a global fp allocator) and do other tricks.
 519 //
 520
 521 /// AddLiveIn - This helper function adds the specified physical register to the
 522 /// MachineFunction as a live in value.  It also creates a corresponding virtual
 523 /// register for it.
 524 static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
 525                           TargetRegisterClass *RC) {
 526   assert(RC->contains(PReg) && "Not the correct regclass!");
 527   unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
 528   MF.addLiveIn(PReg, VReg);
 529   return VReg;
 530 }
 531
 532
 533 std::vector<SDOperand>
 534 X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) {
 535   std::vector<SDOperand> ArgValues;
 536
 537   MachineFunction &MF = DAG.getMachineFunction();
 538   MachineFrameInfo *MFI = MF.getFrameInfo();
 539
 540   // Add DAG nodes to load the arguments...  On entry to a function the stack
 541   // frame looks like this:
 542   //
 543   // [ESP] -- return address
 544   // [ESP + 4] -- first nonreg argument (leftmost lexically)
 545   // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size
 546   //    ...
 547   unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
 548
 549   // Keep track of the number of integer regs passed so far.  This can be either
 550   // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
 551   // used).
 552   unsigned NumIntRegs = 0;
 553
 554   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
 555     MVT::ValueType ObjectVT = getValueType(I->getType());
 556     unsigned ArgIncrement = 4;
 557     unsigned ObjSize = 0;
 558     SDOperand ArgValue;
 559
 560     switch (ObjectVT) {
 561     default: assert(0 && "Unhandled argument type!");
 562     case MVT::i1:
 563     case MVT::i8:
 564       if (NumIntRegs < 2) {
 565         if (!I->use_empty()) {
 566           unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL,
 567                                     X86::R8RegisterClass);
 568           ArgValue = DAG.getCopyFromReg(VReg, MVT::i8, DAG.getRoot());
 569           DAG.setRoot(ArgValue.getValue(1));
 570         }
 571         ++NumIntRegs;
 572         break;
 573       }
 574
 575       ObjSize = 1;
 576       break;
 577     case MVT::i16:
 578       if (NumIntRegs < 2) {
 579         if (!I->use_empty()) {
 580           unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX,
 581                                     X86::R16RegisterClass);
 582           ArgValue = DAG.getCopyFromReg(VReg, MVT::i16, DAG.getRoot());
 583           DAG.setRoot(ArgValue.getValue(1));
 584         }
 585         ++NumIntRegs;
 586         break;
 587       }
 588       ObjSize = 2;
 589       break;
 590     case MVT::i32:
 591       if (NumIntRegs < 2) {
 592         if (!I->use_empty()) {
 593           unsigned VReg = AddLiveIn(MF,NumIntRegs ? X86::EDX : X86::EAX,
 594                                     X86::R32RegisterClass);
 595           ArgValue = DAG.getCopyFromReg(VReg, MVT::i32, DAG.getRoot());
 596           DAG.setRoot(ArgValue.getValue(1));
 597         }
 598         ++NumIntRegs;
 599         break;
 600       }
 601       ObjSize = 4;
 602       break;
 603     case MVT::i64:
 604       if (NumIntRegs == 0) {
 605         if (!I->use_empty()) {
 606           unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass);
 607           unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass);
 608
 609           SDOperand Low=DAG.getCopyFromReg(BotReg, MVT::i32, DAG.getRoot());
 610           SDOperand Hi =DAG.getCopyFromReg(TopReg, MVT::i32, Low.getValue(1));
 611           DAG.setRoot(Hi.getValue(1));
 612
 613           ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi);
 614         }
 615         NumIntRegs = 2;
 616         break;
 617       } else if (NumIntRegs == 1) {
 618         if (!I->use_empty()) {
 619           unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass);
 620           SDOperand Low = DAG.getCopyFromReg(BotReg, MVT::i32, DAG.getRoot());
 621           DAG.setRoot(Low.getValue(1));
 622
 623           // Load the high part from memory.
 624           // Create the frame index object for this incoming parameter...
 625           int FI = MFI->CreateFixedObject(4, ArgOffset);
 626           SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
 627           SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
 628                                      DAG.getSrcValue(NULL));
 629           ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi);
 630         }
 631         ArgOffset += 4;
 632         NumIntRegs = 2;
 633         break;
 634       }
 635       ObjSize = ArgIncrement = 8;
 636       break;
 637     case MVT::f32: ObjSize = 4;                break;
 638     case MVT::f64: ObjSize = ArgIncrement = 8; break;
 639     }
 640
 641     // Don't codegen dead arguments.  FIXME: remove this check when we can nuke
 642     // dead loads.
 643     if (ObjSize && !I->use_empty()) {
 644       // Create the frame index object for this incoming parameter...
 645       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
 646
 647       // Create the SelectionDAG nodes corresponding to a load from this
 648       // parameter.
 649       SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
 650
 651       ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
 652                              DAG.getSrcValue(NULL));
 653     } else if (ArgValue.Val == 0) {
 654       if (MVT::isInteger(ObjectVT))
 655         ArgValue = DAG.getConstant(0, ObjectVT);
 656       else
 657         ArgValue = DAG.getConstantFP(0, ObjectVT);
 658     }
 659     ArgValues.push_back(ArgValue);
 660
 661     if (ObjSize)
 662       ArgOffset += ArgIncrement;   // Move on to the next argument.
 663   }
 664
 665   // Make sure the instruction takes 8n+4 bytes to make sure the start of the
 666   // arguments and the arguments after the retaddr has been pushed are aligned.
 667   if ((ArgOffset & 7) == 0)
 668     ArgOffset += 4;
 669
 670   VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
 671   ReturnAddrIndex = 0;             // No return address slot generated yet.
 672   BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
 673   BytesCallerReserves = 0;
 674
 675   // Finally, inform the code generator which regs we return values in.
 676   switch (getValueType(F.getReturnType())) {
 677   default: assert(0 && "Unknown type!");
 678   case MVT::isVoid: break;
 679   case MVT::i1:
 680   case MVT::i8:
 681   case MVT::i16:
 682   case MVT::i32:
 683     MF.addLiveOut(X86::EAX);
 684     break;
 685   case MVT::i64:
 686     MF.addLiveOut(X86::EAX);
 687     MF.addLiveOut(X86::EDX);
 688     break;
 689   case MVT::f32:
 690   case MVT::f64:
 691     MF.addLiveOut(X86::ST0);
 692     break;
 693   }
 694   return ArgValues;
 695 }
 696
 697 std::pair<SDOperand, SDOperand>
 698 X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy,
 699                                      bool isTailCall, SDOperand Callee,
 700                                      ArgListTy &Args, SelectionDAG &DAG) {
 701   // Count how many bytes are to be pushed on the stack.
 702   unsigned NumBytes = 0;
 703
 704   // Keep track of the number of integer regs passed so far.  This can be either
 705   // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
 706   // used).
 707   unsigned NumIntRegs = 0;
 708
 709   for (unsigned i = 0, e = Args.size(); i != e; ++i)
 710     switch (getValueType(Args[i].second)) {
 711     default: assert(0 && "Unknown value type!");
 712     case MVT::i1:
 713     case MVT::i8:
 714     case MVT::i16:
 715     case MVT::i32:
 716       if (NumIntRegs < 2) {
 717         ++NumIntRegs;
 718         break;
 719       }
 720       // fall through
 721     case MVT::f32:
 722       NumBytes += 4;
 723       break;
 724     case MVT::i64:
 725       if (NumIntRegs == 0) {
 726         NumIntRegs = 2;
 727         break;
 728       } else if (NumIntRegs == 1) {
 729         NumIntRegs = 2;
 730         NumBytes += 4;
 731         break;
 732       }
 733
 734       // fall through
 735     case MVT::f64:
 736       NumBytes += 8;
 737       break;
 738     }
 739
 740   // Make sure the instruction takes 8n+4 bytes to make sure the start of the
 741   // arguments and the arguments after the retaddr has been pushed are aligned.
 742   if ((NumBytes & 7) == 0)
 743     NumBytes += 4;
 744
 745   Chain = DAG.getNode(ISD::CALLSEQ_START, MVT::Other, Chain,
 746                       DAG.getConstant(NumBytes, getPointerTy()));
 747
 748   // Arguments go on the stack in reverse order, as specified by the ABI.
 749   unsigned ArgOffset = 0;
 750   SDOperand StackPtr = DAG.getCopyFromReg(X86::ESP, MVT::i32,
 751                                           DAG.getEntryNode());
 752   NumIntRegs = 0;
 753   std::vector<SDOperand> Stores;
 754   std::vector<SDOperand> RegValuesToPass;
 755   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
 756     switch (getValueType(Args[i].second)) {
 757     default: assert(0 && "Unexpected ValueType for argument!");
 758     case MVT::i1:
 759     case MVT::i8:
 760     case MVT::i16:
 761     case MVT::i32:
 762       if (NumIntRegs < 2) {
 763         RegValuesToPass.push_back(Args[i].first);
 764         ++NumIntRegs;
 765         break;
 766       }
 767       // Fall through
 768     case MVT::f32: {
 769       SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
 770       PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
 771       Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
 772                                    Args[i].first, PtrOff,
 773                                    DAG.getSrcValue(NULL)));
 774       ArgOffset += 4;
 775       break;
 776     }
 777     case MVT::i64:
 778       if (NumIntRegs < 2) {    // Can pass part of it in regs?
 779         SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
 780                                    Args[i].first, DAG.getConstant(1, MVT::i32));
 781         SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
 782                                    Args[i].first, DAG.getConstant(0, MVT::i32));
 783         RegValuesToPass.push_back(Lo);
 784         ++NumIntRegs;
 785         if (NumIntRegs < 2) {   // Pass both parts in regs?
 786           RegValuesToPass.push_back(Hi);
 787           ++NumIntRegs;
 788         } else {
 789           // Pass the high part in memory.
 790           SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
 791           PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
 792           Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
 793                                        Hi, PtrOff, DAG.getSrcValue(NULL)));
 794           ArgOffset += 4;
 795         }
 796         break;
 797       }
 798       // Fall through
 799     case MVT::f64:
 800       SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
 801       PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
 802       Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
 803                                    Args[i].first, PtrOff,
 804                                    DAG.getSrcValue(NULL)));
 805       ArgOffset += 8;
 806       break;
 807     }
 808   }
 809   if (!Stores.empty())
 810     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores);
 811
 812   // Make sure the instruction takes 8n+4 bytes to make sure the start of the
 813   // arguments and the arguments after the retaddr has been pushed are aligned.
 814   if ((ArgOffset & 7) == 0)
 815     ArgOffset += 4;
 816
 817   std::vector<MVT::ValueType> RetVals;
 818   MVT::ValueType RetTyVT = getValueType(RetTy);
 819
 820   RetVals.push_back(MVT::Other);
 821
 822   // The result values produced have to be legal.  Promote the result.
 823   switch (RetTyVT) {
 824   case MVT::isVoid: break;
 825   default:
 826     RetVals.push_back(RetTyVT);
 827     break;
 828   case MVT::i1:
 829   case MVT::i8:
 830   case MVT::i16:
 831     RetVals.push_back(MVT::i32);
 832     break;
 833   case MVT::f32:
 834     if (X86ScalarSSE)
 835       RetVals.push_back(MVT::f32);
 836     else
 837       RetVals.push_back(MVT::f64);
 838     break;
 839   case MVT::i64:
 840     RetVals.push_back(MVT::i32);
 841     RetVals.push_back(MVT::i32);
 842     break;
 843   }
 844
 845   std::vector<SDOperand> Ops;
 846   Ops.push_back(Chain);
 847   Ops.push_back(Callee);
 848   Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy()));
 849   // Callee pops all arg values on the stack.
 850   Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy()));
 851
 852   // Pass register arguments as needed.
 853   Ops.insert(Ops.end(), RegValuesToPass.begin(), RegValuesToPass.end());
 854
 855   SDOperand TheCall = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
 856                                   RetVals, Ops);
 857   Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, TheCall);
 858
 859   SDOperand ResultVal;
 860   switch (RetTyVT) {
 861   case MVT::isVoid: break;
 862   default:
 863     ResultVal = TheCall.getValue(1);
 864     break;
 865   case MVT::i1:
 866   case MVT::i8:
 867   case MVT::i16:
 868     ResultVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, TheCall.getValue(1));
 869     break;
 870   case MVT::f32:
 871     // FIXME: we would really like to remember that this FP_ROUND operation is
 872     // okay to eliminate if we allow excess FP precision.
 873     ResultVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, TheCall.getValue(1));
 874     break;
 875   case MVT::i64:
 876     ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, TheCall.getValue(1),
 877                             TheCall.getValue(2));
 878     break;
 879   }
 880
 881   return std::make_pair(ResultVal, Chain);
 882 }
 883
 884 SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
 885   if (ReturnAddrIndex == 0) {
 886     // Set up a frame object for the return address.
 887     MachineFunction &MF = DAG.getMachineFunction();
 888     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
 889   }
 890
 891   return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32);
 892 }
 893
 894
 895
 896 std::pair<SDOperand, SDOperand> X86TargetLowering::
 897 LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
 898                         SelectionDAG &DAG) {
 899   SDOperand Result;
 900   if (Depth)        // Depths > 0 not supported yet!
 901     Result = DAG.getConstant(0, getPointerTy());
 902   else {
 903     SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
 904     if (!isFrameAddress)
 905       // Just load the return address
 906       Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI,
 907                            DAG.getSrcValue(NULL));
 908     else
 909       Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI,
 910                            DAG.getConstant(4, MVT::i32));
 911   }
 912   return std::make_pair(Result, Chain);
 913 }
 914
 915 /// LowerOperation - Provide custom lowering hooks for some operations.
 916 ///
 917 SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
 918   switch (Op.getOpcode()) {
 919   default: assert(0 && "Should not custom lower this!");
 920   case ISD::SINT_TO_FP:
 921     assert(Op.getValueType() == MVT::f64 &&
 922            Op.getOperand(0).getValueType() == MVT::i64 &&
 923            "Unknown SINT_TO_FP to lower!");
 924     // We lower sint64->FP into a store to a temporary stack slot, followed by a
 925     // FILD64m node.
 926     MachineFunction &MF = DAG.getMachineFunction();
 927     int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
 928     SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
 929     SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
 930                            Op.getOperand(0), StackSlot, DAG.getSrcValue(NULL));
 931     std::vector<MVT::ValueType> RTs;
 932     RTs.push_back(MVT::f64);
 933     RTs.push_back(MVT::Other);
 934     std::vector<SDOperand> Ops;
 935     Ops.push_back(Store);
 936     Ops.push_back(StackSlot);
 937     return DAG.getNode(X86ISD::FILD64m, RTs, Ops);
 938   }
 939 }
 940
 941
 942 //===----------------------------------------------------------------------===//
 943 //                      Pattern Matcher Implementation
 944 //===----------------------------------------------------------------------===//
 945
 946 namespace {
 947   /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
 948   /// SDOperand's instead of register numbers for the leaves of the matched
 949   /// tree.
 950   struct X86ISelAddressMode {
 951     enum {
 952       RegBase,
 953       FrameIndexBase,
 954     } BaseType;
 955
 956     struct {            // This is really a union, discriminated by BaseType!
 957       SDOperand Reg;
 958       int FrameIndex;
 959     } Base;
 960
 961     unsigned Scale;
 962     SDOperand IndexReg;
 963     unsigned Disp;
 964     GlobalValue *GV;
 965
 966     X86ISelAddressMode()
 967       : BaseType(RegBase), Scale(1), IndexReg(), Disp(), GV(0) {
 968     }
 969   };
 970 }
 971
 972
 973 namespace {
 974   Statistic<>
 975   NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
 976
 977   //===--------------------------------------------------------------------===//
 978   /// ISel - X86 specific code to select X86 machine instructions for
 979   /// SelectionDAG operations.
 980   ///
 981   class ISel : public SelectionDAGISel {
 982     /// ContainsFPCode - Every instruction we select that uses or defines a FP
 983     /// register should set this to true.
 984     bool ContainsFPCode;
 985
 986     /// X86Lowering - This object fully describes how to lower LLVM code to an
 987     /// X86-specific SelectionDAG.
 988     X86TargetLowering X86Lowering;
 989
 990     /// RegPressureMap - This keeps an approximate count of the number of
 991     /// registers required to evaluate each node in the graph.
 992     std::map<SDNode*, unsigned> RegPressureMap;
 993
 994     /// ExprMap - As shared expressions are codegen'd, we keep track of which
 995     /// vreg the value is produced in, so we only emit one copy of each compiled
 996     /// tree.
 997     std::map<SDOperand, unsigned> ExprMap;
 998
 999     /// TheDAG - The DAG being selected during Select* operations.
1000     SelectionDAG *TheDAG;
1001
1002     /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
1003     /// make the right decision when generating code for different targets.
1004     const X86Subtarget *Subtarget;
1005   public:
1006     ISel(TargetMachine &TM) : SelectionDAGISel(X86Lowering), X86Lowering(TM) {
1007       Subtarget = TM.getSubtarget<const X86Subtarget>();
1008     }
1009
1010     virtual const char *getPassName() const {
1011       return "X86 Pattern Instruction Selection";
1012     }
1013
1014     unsigned getRegPressure(SDOperand O) {
1015       return RegPressureMap[O.Val];
1016     }
1017     unsigned ComputeRegPressure(SDOperand O);
1018
1019     /// InstructionSelectBasicBlock - This callback is invoked by
1020     /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
1021     virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
1022
1023     virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
1024
1025     bool isFoldableLoad(SDOperand Op, SDOperand OtherOp,
1026                         bool FloatPromoteOk = false);
1027     void EmitFoldedLoad(SDOperand Op, X86AddressMode &AM);
1028     bool TryToFoldLoadOpStore(SDNode *Node);
1029     bool EmitOrOpOp(SDOperand Op1, SDOperand Op2, unsigned DestReg);
1030     void EmitCMP(SDOperand LHS, SDOperand RHS, bool isOnlyUse);
1031     bool EmitBranchCC(MachineBasicBlock *Dest, SDOperand Chain, SDOperand Cond);
1032     void EmitSelectCC(SDOperand Cond, MVT::ValueType SVT,
1033                       unsigned RTrue, unsigned RFalse, unsigned RDest);
1034     unsigned SelectExpr(SDOperand N);
1035
1036     X86AddressMode SelectAddrExprs(const X86ISelAddressMode &IAM);
1037     bool MatchAddress(SDOperand N, X86ISelAddressMode &AM);
1038     void SelectAddress(SDOperand N, X86AddressMode &AM);
1039     bool EmitPotentialTailCall(SDNode *Node);
1040     void EmitFastCCToFastCCTailCall(SDNode *TailCallNode);
1041     void Select(SDOperand N);
1042   };
1043 }
1044
1045 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
1046 /// the main function.
1047 static void EmitSpecialCodeForMain(MachineBasicBlock *BB,
1048                                    MachineFrameInfo *MFI) {
1049   // Switch the FPU to 64-bit precision mode for better compatibility and speed.
1050   int CWFrameIdx = MFI->CreateStackObject(2, 2);
1051   addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
1052
1053   // Set the high part to be 64-bit precision.
1054   addFrameReference(BuildMI(BB, X86::MOV8mi, 5),
1055                     CWFrameIdx, 1).addImm(2);
1056
1057   // Reload the modified control word now.
1058   addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
1059 }
1060
1061 void ISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
1062   // If this function has live-in values, emit the copies from pregs to vregs at
1063   // the top of the function, before anything else.
1064   MachineBasicBlock *BB = MF.begin();
1065   if (MF.livein_begin() != MF.livein_end()) {
1066     SSARegMap *RegMap = MF.getSSARegMap();
1067     for (MachineFunction::livein_iterator LI = MF.livein_begin(),
1068          E = MF.livein_end(); LI != E; ++LI) {
1069       const TargetRegisterClass *RC = RegMap->getRegClass(LI->second);
1070       if (RC == X86::R8RegisterClass) {
1071         BuildMI(BB, X86::MOV8rr, 1, LI->second).addReg(LI->first);
1072       } else if (RC == X86::R16RegisterClass) {
1073         BuildMI(BB, X86::MOV16rr, 1, LI->second).addReg(LI->first);
1074       } else if (RC == X86::R32RegisterClass) {
1075         BuildMI(BB, X86::MOV32rr, 1, LI->second).addReg(LI->first);
1076       } else if (RC == X86::RFPRegisterClass) {
1077         BuildMI(BB, X86::FpMOV, 1, LI->second).addReg(LI->first);
1078       } else if (RC == X86::RXMMRegisterClass) {
1079         BuildMI(BB, X86::MOVAPDrr, 1, LI->second).addReg(LI->first);
1080       } else {
1081         assert(0 && "Unknown regclass!");
1082       }
1083     }
1084   }
1085
1086
1087   // If this is main, emit special code for main.
1088   if (Fn.hasExternalLinkage() && Fn.getName() == "main")
1089     EmitSpecialCodeForMain(BB, MF.getFrameInfo());
1090 }
1091
1092
1093 /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
1094 /// when it has created a SelectionDAG for us to codegen.
1095 void ISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
1096   // While we're doing this, keep track of whether we see any FP code for
1097   // FP_REG_KILL insertion.
1098   ContainsFPCode = false;
1099   MachineFunction *MF = BB->getParent();
1100
1101   // Scan the PHI nodes that already are inserted into this basic block.  If any
1102   // of them is a PHI of a floating point value, we need to insert an
1103   // FP_REG_KILL.
1104   SSARegMap *RegMap = MF->getSSARegMap();
1105   if (BB != MF->begin())
1106     for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
1107          I != E; ++I) {
1108       assert(I->getOpcode() == X86::PHI &&
1109              "Isn't just PHI nodes?");
1110       if (RegMap->getRegClass(I->getOperand(0).getReg()) ==
1111           X86::RFPRegisterClass) {
1112         ContainsFPCode = true;
1113         break;
1114       }
1115     }
1116
1117   // Compute the RegPressureMap, which is an approximation for the number of
1118   // registers required to compute each node.
1119   ComputeRegPressure(DAG.getRoot());
1120
1121   TheDAG = &DAG;
1122
1123   // Codegen the basic block.
1124   Select(DAG.getRoot());
1125
1126   TheDAG = 0;
1127
1128   // Finally, look at all of the successors of this block.  If any contain a PHI
1129   // node of FP type, we need to insert an FP_REG_KILL in this block.
1130   for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
1131          E = BB->succ_end(); SI != E && !ContainsFPCode; ++SI)
1132     for (MachineBasicBlock::iterator I = (*SI)->begin(), E = (*SI)->end();
1133          I != E && I->getOpcode() == X86::PHI; ++I) {
1134       if (RegMap->getRegClass(I->getOperand(0).getReg()) ==
1135           X86::RFPRegisterClass) {
1136         ContainsFPCode = true;
1137         break;
1138       }
1139     }
1140
1141   // Final check, check LLVM BB's that are successors to the LLVM BB
1142   // corresponding to BB for FP PHI nodes.
1143   const BasicBlock *LLVMBB = BB->getBasicBlock();
1144   const PHINode *PN;
1145   if (!ContainsFPCode)
1146     for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
1147          SI != E && !ContainsFPCode; ++SI)
1148       for (BasicBlock::const_iterator II = SI->begin();
1149            (PN = dyn_cast<PHINode>(II)); ++II)
1150         if (PN->getType()->isFloatingPoint()) {
1151           ContainsFPCode = true;
1152           break;
1153         }
1154
1155
1156   // Insert FP_REG_KILL instructions into basic blocks that need them.  This
1157   // only occurs due to the floating point stackifier not being aggressive
1158   // enough to handle arbitrary global stackification.
1159   //
1160   // Currently we insert an FP_REG_KILL instruction into each block that uses or
1161   // defines a floating point virtual register.
1162   //
1163   // When the global register allocators (like linear scan) finally update live
1164   // variable analysis, we can keep floating point values in registers across
1165   // basic blocks.  This will be a huge win, but we are waiting on the global
1166   // allocators before we can do this.
1167   //
1168   if (ContainsFPCode) {
1169     BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
1170     ++NumFPKill;
1171   }
1172
1173   // Clear state used for selection.
1174   ExprMap.clear();
1175   RegPressureMap.clear();
1176 }
1177
1178
1179 // ComputeRegPressure - Compute the RegPressureMap, which is an approximation
1180 // for the number of registers required to compute each node.  This is basically
1181 // computing a generalized form of the Sethi-Ullman number for each node.
1182 unsigned ISel::ComputeRegPressure(SDOperand O) {
1183   SDNode *N = O.Val;
1184   unsigned &Result = RegPressureMap[N];
1185   if (Result) return Result;
1186
1187   // FIXME: Should operations like CALL (which clobber lots o regs) have a
1188   // higher fixed cost??
1189
1190   if (N->getNumOperands() == 0) {
1191     Result = 1;
1192   } else {
1193     unsigned MaxRegUse = 0;
1194     unsigned NumExtraMaxRegUsers = 0;
1195     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1196       unsigned Regs;
1197       if (N->getOperand(i).getOpcode() == ISD::Constant)
1198         Regs = 0;
1199       else
1200         Regs = ComputeRegPressure(N->getOperand(i));
1201       if (Regs > MaxRegUse) {
1202         MaxRegUse = Regs;
1203         NumExtraMaxRegUsers = 0;
1204       } else if (Regs == MaxRegUse &&
1205                  N->getOperand(i).getValueType() != MVT::Other) {
1206         ++NumExtraMaxRegUsers;
1207       }
1208     }
1209
1210     if (O.getOpcode() != ISD::TokenFactor)
1211       Result = MaxRegUse+NumExtraMaxRegUsers;
1212     else
1213       Result = MaxRegUse == 1 ? 0 : MaxRegUse-1;
1214   }
1215
1216   //std::cerr << " WEIGHT: " << Result << " ";  N->dump(); std::cerr << "\n";
1217   return Result;
1218 }
1219
1220 /// NodeTransitivelyUsesValue - Return true if N or any of its uses uses Op.
1221 /// The DAG cannot have cycles in it, by definition, so the visited set is not
1222 /// needed to prevent infinite loops.  The DAG CAN, however, have unbounded
1223 /// reuse, so it prevents exponential cases.
1224 ///
1225 static bool NodeTransitivelyUsesValue(SDOperand N, SDOperand Op,
1226                                       std::set<SDNode*> &Visited) {
1227   if (N == Op) return true;                        // Found it.
1228   SDNode *Node = N.Val;
1229   if (Node->getNumOperands() == 0 ||      // Leaf?
1230       Node->getNodeDepth() <= Op.getNodeDepth()) return false; // Can't find it?
1231   if (!Visited.insert(Node).second) return false;  // Already visited?
1232
1233   // Recurse for the first N-1 operands.
1234   for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
1235     if (NodeTransitivelyUsesValue(Node->getOperand(i), Op, Visited))
1236       return true;
1237
1238   // Tail recurse for the last operand.
1239   return NodeTransitivelyUsesValue(Node->getOperand(0), Op, Visited);
1240 }
1241
1242 X86AddressMode ISel::SelectAddrExprs(const X86ISelAddressMode &IAM) {
1243   X86AddressMode Result;
1244
1245   // If we need to emit two register operands, emit the one with the highest
1246   // register pressure first.
1247   if (IAM.BaseType == X86ISelAddressMode::RegBase &&
1248       IAM.Base.Reg.Val && IAM.IndexReg.Val) {
1249     bool EmitBaseThenIndex;
1250     if (getRegPressure(IAM.Base.Reg) > getRegPressure(IAM.IndexReg)) {
1251       std::set<SDNode*> Visited;
1252       EmitBaseThenIndex = true;
1253       // If Base ends up pointing to Index, we must emit index first.  This is
1254       // because of the way we fold loads, we may end up doing bad things with
1255       // the folded add.
1256       if (NodeTransitivelyUsesValue(IAM.Base.Reg, IAM.IndexReg, Visited))
1257         EmitBaseThenIndex = false;
1258     } else {
1259       std::set<SDNode*> Visited;
1260       EmitBaseThenIndex = false;
1261       // If Base ends up pointing to Index, we must emit index first.  This is
1262       // because of the way we fold loads, we may end up doing bad things with
1263       // the folded add.
1264       if (NodeTransitivelyUsesValue(IAM.IndexReg, IAM.Base.Reg, Visited))
1265         EmitBaseThenIndex = true;
1266     }
1267
1268     if (EmitBaseThenIndex) {
1269       Result.Base.Reg = SelectExpr(IAM.Base.Reg);
1270       Result.IndexReg = SelectExpr(IAM.IndexReg);
1271     } else {
1272       Result.IndexReg = SelectExpr(IAM.IndexReg);
1273       Result.Base.Reg = SelectExpr(IAM.Base.Reg);
1274     }
1275
1276   } else if (IAM.BaseType == X86ISelAddressMode::RegBase && IAM.Base.Reg.Val) {
1277     Result.Base.Reg = SelectExpr(IAM.Base.Reg);
1278   } else if (IAM.IndexReg.Val) {
1279     Result.IndexReg = SelectExpr(IAM.IndexReg);
1280   }
1281
1282   switch (IAM.BaseType) {
1283   case X86ISelAddressMode::RegBase:
1284     Result.BaseType = X86AddressMode::RegBase;
1285     break;
1286   case X86ISelAddressMode::FrameIndexBase:
1287     Result.BaseType = X86AddressMode::FrameIndexBase;
1288     Result.Base.FrameIndex = IAM.Base.FrameIndex;
1289     break;
1290   default:
1291     assert(0 && "Unknown base type!");
1292     break;
1293   }
1294   Result.Scale = IAM.Scale;
1295   Result.Disp = IAM.Disp;
1296   Result.GV = IAM.GV;
1297   return Result;
1298 }
1299
1300 /// SelectAddress - Pattern match the maximal addressing mode for this node and
1301 /// emit all of the leaf registers.
1302 void ISel::SelectAddress(SDOperand N, X86AddressMode &AM) {
1303   X86ISelAddressMode IAM;
1304   MatchAddress(N, IAM);
1305   AM = SelectAddrExprs(IAM);
1306 }
1307
1308 /// MatchAddress - Add the specified node to the specified addressing mode,
1309 /// returning true if it cannot be done.  This just pattern matches for the
1310 /// addressing mode, it does not cause any code to be emitted.  For that, use
1311 /// SelectAddress.
1312 bool ISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM) {
1313   switch (N.getOpcode()) {
1314   default: break;
1315   case ISD::FrameIndex:
1316     if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
1317       AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1318       AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1319       return false;
1320     }
1321     break;
1322   case ISD::GlobalAddress:
1323     if (AM.GV == 0) {
1324       GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
1325       // For Darwin, external and weak symbols are indirect, so we want to load
1326       // the value at address GV, not the value of GV itself.  This means that
1327       // the GlobalAddress must be in the base or index register of the address,
1328       // not the GV offset field.
1329       if (Subtarget->getIndirectExternAndWeakGlobals() &&
1330           (GV->hasWeakLinkage() || GV->isExternal())) {
1331         break;
1332       } else {
1333         AM.GV = GV;
1334         return false;
1335       }
1336     }
1337     break;
1338   case ISD::Constant:
1339     AM.Disp += cast<ConstantSDNode>(N)->getValue();
1340     return false;
1341   case ISD::SHL:
1342     // We might have folded the load into this shift, so don't regen the value
1343     // if so.
1344     if (ExprMap.count(N)) break;
1345
1346     if (AM.IndexReg.Val == 0 && AM.Scale == 1)
1347       if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
1348         unsigned Val = CN->getValue();
1349         if (Val == 1 || Val == 2 || Val == 3) {
1350           AM.Scale = 1 << Val;
1351           SDOperand ShVal = N.Val->getOperand(0);
1352
1353           // Okay, we know that we have a scale by now.  However, if the scaled
1354           // value is an add of something and a constant, we can fold the
1355           // constant into the disp field here.
1356           if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
1357               isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
1358             AM.IndexReg = ShVal.Val->getOperand(0);
1359             ConstantSDNode *AddVal =
1360               cast<ConstantSDNode>(ShVal.Val->getOperand(1));
1361             AM.Disp += AddVal->getValue() << Val;
1362           } else {
1363             AM.IndexReg = ShVal;
1364           }
1365           return false;
1366         }
1367       }
1368     break;
1369   case ISD::MUL:
1370     // We might have folded the load into this mul, so don't regen the value if
1371     // so.
1372     if (ExprMap.count(N)) break;
1373
1374     // X*[3,5,9] -> X+X*[2,4,8]
1375     if (AM.IndexReg.Val == 0 && AM.BaseType == X86ISelAddressMode::RegBase &&
1376         AM.Base.Reg.Val == 0)
1377       if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
1378         if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
1379           AM.Scale = unsigned(CN->getValue())-1;
1380
1381           SDOperand MulVal = N.Val->getOperand(0);
1382           SDOperand Reg;
1383
1384           // Okay, we know that we have a scale by now.  However, if the scaled
1385           // value is an add of something and a constant, we can fold the
1386           // constant into the disp field here.
1387           if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1388               isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
1389             Reg = MulVal.Val->getOperand(0);
1390             ConstantSDNode *AddVal =
1391               cast<ConstantSDNode>(MulVal.Val->getOperand(1));
1392             AM.Disp += AddVal->getValue() * CN->getValue();
1393           } else {
1394             Reg = N.Val->getOperand(0);
1395           }
1396
1397           AM.IndexReg = AM.Base.Reg = Reg;
1398           return false;
1399         }
1400     break;
1401
1402   case ISD::ADD: {
1403     // We might have folded the load into this mul, so don't regen the value if
1404     // so.
1405     if (ExprMap.count(N)) break;
1406
1407     X86ISelAddressMode Backup = AM;
1408     if (!MatchAddress(N.Val->getOperand(0), AM) &&
1409         !MatchAddress(N.Val->getOperand(1), AM))
1410       return false;
1411     AM = Backup;
1412     if (!MatchAddress(N.Val->getOperand(1), AM) &&
1413         !MatchAddress(N.Val->getOperand(0), AM))
1414       return false;
1415     AM = Backup;
1416     break;
1417   }
1418   }
1419
1420   // Is the base register already occupied?
1421   if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
1422     // If so, check to see if the scale index register is set.
1423     if (AM.IndexReg.Val == 0) {
1424       AM.IndexReg = N;
1425       AM.Scale = 1;
1426       return false;
1427     }
1428
1429     // Otherwise, we cannot select it.
1430     return true;
1431   }
1432
1433   // Default, generate it as a register.
1434   AM.BaseType = X86ISelAddressMode::RegBase;
1435   AM.Base.Reg = N;
1436   return false;
1437 }
1438
1439 /// Emit2SetCCsAndLogical - Emit the following sequence of instructions,
1440 /// assuming that the temporary registers are in the 8-bit register class.
1441 ///
1442 ///  Tmp1 = setcc1
1443 ///  Tmp2 = setcc2
1444 ///  DestReg = logicalop Tmp1, Tmp2
1445 ///
1446 static void Emit2SetCCsAndLogical(MachineBasicBlock *BB, unsigned SetCC1,
1447                                   unsigned SetCC2, unsigned LogicalOp,
1448                                   unsigned DestReg) {
1449   SSARegMap *RegMap = BB->getParent()->getSSARegMap();
1450   unsigned Tmp1 = RegMap->createVirtualRegister(X86::R8RegisterClass);
1451   unsigned Tmp2 = RegMap->createVirtualRegister(X86::R8RegisterClass);
1452   BuildMI(BB, SetCC1, 0, Tmp1);
1453   BuildMI(BB, SetCC2, 0, Tmp2);
1454   BuildMI(BB, LogicalOp, 2, DestReg).addReg(Tmp1).addReg(Tmp2);
1455 }
1456
1457 /// EmitSetCC - Emit the code to set the specified 8-bit register to 1 if the
1458 /// condition codes match the specified SetCCOpcode.  Note that some conditions
1459 /// require multiple instructions to generate the correct value.
1460 static void EmitSetCC(MachineBasicBlock *BB, unsigned DestReg,
1461                       ISD::CondCode SetCCOpcode, bool isFP) {
1462   unsigned Opc;
1463   if (!isFP) {
1464     switch (SetCCOpcode) {
1465     default: assert(0 && "Illegal integer SetCC!");
1466     case ISD::SETEQ: Opc = X86::SETEr; break;
1467     case ISD::SETGT: Opc = X86::SETGr; break;
1468     case ISD::SETGE: Opc = X86::SETGEr; break;
1469     case ISD::SETLT: Opc = X86::SETLr; break;
1470     case ISD::SETLE: Opc = X86::SETLEr; break;
1471     case ISD::SETNE: Opc = X86::SETNEr; break;
1472     case ISD::SETULT: Opc = X86::SETBr; break;
1473     case ISD::SETUGT: Opc = X86::SETAr; break;
1474     case ISD::SETULE: Opc = X86::SETBEr; break;
1475     case ISD::SETUGE: Opc = X86::SETAEr; break;
1476     }
1477   } else {
1478     // On a floating point condition, the flags are set as follows:
1479     // ZF  PF  CF   op
1480     //  0 | 0 | 0 | X > Y
1481     //  0 | 0 | 1 | X < Y
1482     //  1 | 0 | 0 | X == Y
1483     //  1 | 1 | 1 | unordered
1484     //
1485     switch (SetCCOpcode) {
1486     default: assert(0 && "Invalid FP setcc!");
1487     case ISD::SETUEQ:
1488     case ISD::SETEQ:
1489       Opc = X86::SETEr;    // True if ZF = 1
1490       break;
1491     case ISD::SETOGT:
1492     case ISD::SETGT:
1493       Opc = X86::SETAr;    // True if CF = 0 and ZF = 0
1494       break;
1495     case ISD::SETOGE:
1496     case ISD::SETGE:
1497       Opc = X86::SETAEr;   // True if CF = 0
1498       break;
1499     case ISD::SETULT:
1500     case ISD::SETLT:
1501       Opc = X86::SETBr;    // True if CF = 1
1502       break;
1503     case ISD::SETULE:
1504     case ISD::SETLE:
1505       Opc = X86::SETBEr;   // True if CF = 1 or ZF = 1
1506       break;
1507     case ISD::SETONE:
1508     case ISD::SETNE:
1509       Opc = X86::SETNEr;   // True if ZF = 0
1510       break;
1511     case ISD::SETUO:
1512       Opc = X86::SETPr;    // True if PF = 1
1513       break;
1514     case ISD::SETO:
1515       Opc = X86::SETNPr;   // True if PF = 0
1516       break;
1517     case ISD::SETOEQ:      // !PF & ZF
1518       Emit2SetCCsAndLogical(BB, X86::SETNPr, X86::SETEr, X86::AND8rr, DestReg);
1519       return;
1520     case ISD::SETOLT:      // !PF & CF
1521       Emit2SetCCsAndLogical(BB, X86::SETNPr, X86::SETBr, X86::AND8rr, DestReg);
1522       return;
1523     case ISD::SETOLE:      // !PF & (CF || ZF)
1524       Emit2SetCCsAndLogical(BB, X86::SETNPr, X86::SETBEr, X86::AND8rr, DestReg);
1525       return;
1526     case ISD::SETUGT:      // PF | (!ZF & !CF)
1527       Emit2SetCCsAndLogical(BB, X86::SETPr, X86::SETAr, X86::OR8rr, DestReg);
1528       return;
1529     case ISD::SETUGE:      // PF | !CF
1530       Emit2SetCCsAndLogical(BB, X86::SETPr, X86::SETAEr, X86::OR8rr, DestReg);
1531       return;
1532     case ISD::SETUNE:      // PF | !ZF
1533       Emit2SetCCsAndLogical(BB, X86::SETPr, X86::SETNEr, X86::OR8rr, DestReg);
1534       return;
1535     }
1536   }
1537   BuildMI(BB, Opc, 0, DestReg);
1538 }
1539
1540
1541 /// EmitBranchCC - Emit code into BB that arranges for control to transfer to
1542 /// the Dest block if the Cond condition is true.  If we cannot fold this
1543 /// condition into the branch, return true.
1544 ///
1545 bool ISel::EmitBranchCC(MachineBasicBlock *Dest, SDOperand Chain,
1546                         SDOperand Cond) {
1547   // FIXME: Evaluate whether it would be good to emit code like (X < Y) | (A >
1548   // B) using two conditional branches instead of one condbr, two setcc's, and
1549   // an or.
1550   if ((Cond.getOpcode() == ISD::OR ||
1551        Cond.getOpcode() == ISD::AND) && Cond.Val->hasOneUse()) {
1552     // And and or set the flags for us, so there is no need to emit a TST of the
1553     // result.  It is only safe to do this if there is only a single use of the
1554     // AND/OR though, otherwise we don't know it will be emitted here.
1555     Select(Chain);
1556     SelectExpr(Cond);
1557     BuildMI(BB, X86::JNE, 1).addMBB(Dest);
1558     return false;
1559   }
1560
1561   // Codegen br not C -> JE.
1562   if (Cond.getOpcode() == ISD::XOR)
1563     if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(Cond.Val->getOperand(1)))
1564       if (NC->isAllOnesValue()) {
1565         unsigned CondR;
1566         if (getRegPressure(Chain) > getRegPressure(Cond)) {
1567           Select(Chain);
1568           CondR = SelectExpr(Cond.Val->getOperand(0));
1569         } else {
1570           CondR = SelectExpr(Cond.Val->getOperand(0));
1571           Select(Chain);
1572         }
1573         BuildMI(BB, X86::TEST8rr, 2).addReg(CondR).addReg(CondR);
1574         BuildMI(BB, X86::JE, 1).addMBB(Dest);
1575         return false;
1576       }
1577
1578   SetCCSDNode *SetCC = dyn_cast<SetCCSDNode>(Cond);
1579   if (SetCC == 0)
1580     return true;                       // Can only handle simple setcc's so far.
1581
1582   unsigned Opc;
1583
1584   // Handle integer conditions first.
1585   if (MVT::isInteger(SetCC->getOperand(0).getValueType())) {
1586     switch (SetCC->getCondition()) {
1587     default: assert(0 && "Illegal integer SetCC!");
1588     case ISD::SETEQ: Opc = X86::JE; break;
1589     case ISD::SETGT: Opc = X86::JG; break;
1590     case ISD::SETGE: Opc = X86::JGE; break;
1591     case ISD::SETLT: Opc = X86::JL; break;
1592     case ISD::SETLE: Opc = X86::JLE; break;
1593     case ISD::SETNE: Opc = X86::JNE; break;
1594     case ISD::SETULT: Opc = X86::JB; break;
1595     case ISD::SETUGT: Opc = X86::JA; break;
1596     case ISD::SETULE: Opc = X86::JBE; break;
1597     case ISD::SETUGE: Opc = X86::JAE; break;
1598     }
1599     Select(Chain);
1600     EmitCMP(SetCC->getOperand(0), SetCC->getOperand(1), SetCC->hasOneUse());
1601     BuildMI(BB, Opc, 1).addMBB(Dest);
1602     return false;
1603   }
1604
1605   unsigned Opc2 = 0;  // Second branch if needed.
1606
1607   // On a floating point condition, the flags are set as follows:
1608   // ZF  PF  CF   op
1609   //  0 | 0 | 0 | X > Y
1610   //  0 | 0 | 1 | X < Y
1611   //  1 | 0 | 0 | X == Y
1612   //  1 | 1 | 1 | unordered
1613   //
1614   switch (SetCC->getCondition()) {
1615   default: assert(0 && "Invalid FP setcc!");
1616   case ISD::SETUEQ:
1617   case ISD::SETEQ:   Opc = X86::JE;  break;     // True if ZF = 1
1618   case ISD::SETOGT:
1619   case ISD::SETGT:   Opc = X86::JA;  break;     // True if CF = 0 and ZF = 0
1620   case ISD::SETOGE:
1621   case ISD::SETGE:   Opc = X86::JAE; break;     // True if CF = 0
1622   case ISD::SETULT:
1623   case ISD::SETLT:   Opc = X86::JB;  break;     // True if CF = 1
1624   case ISD::SETULE:
1625   case ISD::SETLE:   Opc = X86::JBE; break;     // True if CF = 1 or ZF = 1
1626   case ISD::SETONE:
1627   case ISD::SETNE:   Opc = X86::JNE; break;     // True if ZF = 0
1628   case ISD::SETUO:   Opc = X86::JP;  break;     // True if PF = 1
1629   case ISD::SETO:    Opc = X86::JNP; break;     // True if PF = 0
1630   case ISD::SETUGT:      // PF = 1 | (ZF = 0 & CF = 0)
1631     Opc = X86::JA;       // ZF = 0 & CF = 0
1632     Opc2 = X86::JP;      // PF = 1
1633     break;
1634   case ISD::SETUGE:      // PF = 1 | CF = 0
1635     Opc = X86::JAE;      // CF = 0
1636     Opc2 = X86::JP;      // PF = 1
1637     break;
1638   case ISD::SETUNE:      // PF = 1 | ZF = 0
1639     Opc = X86::JNE;      // ZF = 0
1640     Opc2 = X86::JP;      // PF = 1
1641     break;
1642   case ISD::SETOEQ:      // PF = 0 & ZF = 1
1643     //X86::JNP, X86::JE
1644     //X86::AND8rr
1645     return true;    // FIXME: Emit more efficient code for this branch.
1646   case ISD::SETOLT:      // PF = 0 & CF = 1
1647     //X86::JNP, X86::JB
1648     //X86::AND8rr
1649     return true;    // FIXME: Emit more efficient code for this branch.
1650   case ISD::SETOLE:      // PF = 0 & (CF = 1 || ZF = 1)
1651     //X86::JNP, X86::JBE
1652     //X86::AND8rr
1653     return true;    // FIXME: Emit more efficient code for this branch.
1654   }
1655
1656   Select(Chain);
1657   EmitCMP(SetCC->getOperand(0), SetCC->getOperand(1), SetCC->hasOneUse());
1658   BuildMI(BB, Opc, 1).addMBB(Dest);
1659   if (Opc2)
1660     BuildMI(BB, Opc2, 1).addMBB(Dest);
1661   return false;
1662 }
1663
1664 /// EmitSelectCC - Emit code into BB that performs a select operation between
1665 /// the two registers RTrue and RFalse, generating a result into RDest.  Return
1666 /// true if the fold cannot be performed.
1667 ///
1668 void ISel::EmitSelectCC(SDOperand Cond, MVT::ValueType SVT,
1669                         unsigned RTrue, unsigned RFalse, unsigned RDest) {
1670   enum Condition {
1671     EQ, NE, LT, LE, GT, GE, B, BE, A, AE, P, NP,
1672     NOT_SET
1673   } CondCode = NOT_SET;
1674
1675   static const unsigned CMOVTAB16[] = {
1676     X86::CMOVE16rr,  X86::CMOVNE16rr, X86::CMOVL16rr,  X86::CMOVLE16rr,
1677     X86::CMOVG16rr,  X86::CMOVGE16rr, X86::CMOVB16rr,  X86::CMOVBE16rr,
1678     X86::CMOVA16rr,  X86::CMOVAE16rr, X86::CMOVP16rr,  X86::CMOVNP16rr,
1679   };
1680   static const unsigned CMOVTAB32[] = {
1681     X86::CMOVE32rr,  X86::CMOVNE32rr, X86::CMOVL32rr,  X86::CMOVLE32rr,
1682     X86::CMOVG32rr,  X86::CMOVGE32rr, X86::CMOVB32rr,  X86::CMOVBE32rr,
1683     X86::CMOVA32rr,  X86::CMOVAE32rr, X86::CMOVP32rr,  X86::CMOVNP32rr,
1684   };
1685   static const unsigned CMOVTABFP[] = {
1686     X86::FCMOVE ,  X86::FCMOVNE, /*missing*/0, /*missing*/0,
1687     /*missing*/0,  /*missing*/0, X86::FCMOVB , X86::FCMOVBE,
1688     X86::FCMOVA ,  X86::FCMOVAE, X86::FCMOVP , X86::FCMOVNP
1689   };
1690   static const unsigned SSE_CMOVTAB[] = {
1691     0 /* CMPEQSS */, 4 /* CMPNEQSS */, 1 /* CMPLTSS */, 2 /* CMPLESS */,
1692     2 /* CMPLESS */, 1 /* CMPLTSS */, /*missing*/0, /*missing*/0,
1693     /*missing*/0,  /*missing*/0, /*missing*/0, /*missing*/0
1694   };
1695
1696   if (SetCCSDNode *SetCC = dyn_cast<SetCCSDNode>(Cond)) {
1697     if (MVT::isInteger(SetCC->getOperand(0).getValueType())) {
1698       switch (SetCC->getCondition()) {
1699       default: assert(0 && "Unknown integer comparison!");
1700       case ISD::SETEQ:  CondCode = EQ; break;
1701       case ISD::SETGT:  CondCode = GT; break;
1702       case ISD::SETGE:  CondCode = GE; break;
1703       case ISD::SETLT:  CondCode = LT; break;
1704       case ISD::SETLE:  CondCode = LE; break;
1705       case ISD::SETNE:  CondCode = NE; break;
1706       case ISD::SETULT: CondCode = B; break;
1707       case ISD::SETUGT: CondCode = A; break;
1708       case ISD::SETULE: CondCode = BE; break;
1709       case ISD::SETUGE: CondCode = AE; break;
1710       }
1711     } else if (X86ScalarSSE) {
1712       switch (SetCC->getCondition()) {
1713       default: assert(0 && "Unknown scalar fp comparison!");
1714       case ISD::SETEQ:  CondCode = EQ; break;
1715       case ISD::SETNE:  CondCode = NE; break;
1716       case ISD::SETULT:
1717       case ISD::SETLT:  CondCode = LT; break;
1718       case ISD::SETULE:
1719       case ISD::SETLE:  CondCode = LE; break;
1720       case ISD::SETUGT:
1721       case ISD::SETGT:  CondCode = GT; break;
1722       case ISD::SETUGE:
1723       case ISD::SETGE:  CondCode = GE; break;
1724       }
1725     } else {
1726       // On a floating point condition, the flags are set as follows:
1727       // ZF  PF  CF   op
1728       //  0 | 0 | 0 | X > Y
1729       //  0 | 0 | 1 | X < Y
1730       //  1 | 0 | 0 | X == Y
1731       //  1 | 1 | 1 | unordered
1732       //
1733       switch (SetCC->getCondition()) {
1734       default: assert(0 && "Unknown FP comparison!");
1735       case ISD::SETUEQ:
1736       case ISD::SETEQ:  CondCode = EQ; break;     // True if ZF = 1
1737       case ISD::SETOGT:
1738       case ISD::SETGT:  CondCode = A;  break;     // True if CF = 0 and ZF = 0
1739       case ISD::SETOGE:
1740       case ISD::SETGE:  CondCode = AE; break;     // True if CF = 0
1741       case ISD::SETULT:
1742       case ISD::SETLT:  CondCode = B;  break;     // True if CF = 1
1743       case ISD::SETULE:
1744       case ISD::SETLE:  CondCode = BE; break;     // True if CF = 1 or ZF = 1
1745       case ISD::SETONE:
1746       case ISD::SETNE:  CondCode = NE; break;     // True if ZF = 0
1747       case ISD::SETUO:  CondCode = P;  break;     // True if PF = 1
1748       case ISD::SETO:   CondCode = NP; break;     // True if PF = 0
1749       case ISD::SETUGT:      // PF = 1 | (ZF = 0 & CF = 0)
1750       case ISD::SETUGE:      // PF = 1 | CF = 0
1751       case ISD::SETUNE:      // PF = 1 | ZF = 0
1752       case ISD::SETOEQ:      // PF = 0 & ZF = 1
1753       case ISD::SETOLT:      // PF = 0 & CF = 1
1754       case ISD::SETOLE:      // PF = 0 & (CF = 1 || ZF = 1)
1755         // We cannot emit this comparison as a single cmov.
1756         break;
1757       }
1758     }
1759   }
1760
1761   // There's no SSE equivalent of FCMOVE.  In some cases we can fake it up, in
1762   // Others we will have to do the PowerPC thing and generate an MBB for the
1763   // true and false values and select between them with a PHI.
1764   if (X86ScalarSSE) {
1765     if (CondCode != NOT_SET) {
1766       unsigned CMPSOpc = (SVT == MVT::f64) ? X86::CMPSDrr : X86::CMPSSrr;
1767       unsigned CMPSImm = SSE_CMOVTAB[CondCode];
1768       // FIXME check for min
1769       // FIXME check for max
1770       // FIXME check for reverse
1771       unsigned LHS = SelectExpr(Cond.getOperand(0));
1772       unsigned RHS = SelectExpr(Cond.getOperand(1));
1773       // emit compare mask
1774       unsigned MaskReg = MakeReg(SVT);
1775       BuildMI(BB, CMPSOpc, 3, MaskReg).addReg(LHS).addReg(RHS).addImm(CMPSImm);
1776       // emit and with mask
1777       unsigned TrueMask = MakeReg(SVT);
1778       unsigned AndOpc = (SVT == MVT::f32) ? X86::ANDPSrr : X86::ANDPDrr;
1779       BuildMI(BB, AndOpc, 2, TrueMask).addReg(RTrue).addReg(MaskReg);
1780       // emit and with inverse mask
1781       unsigned FalseMask = MakeReg(SVT);
1782       unsigned AndnOpc = (SVT == MVT::f32) ? X86::ANDNPSrr : X86::ANDNPDrr;
1783       BuildMI(BB, AndnOpc, 2, FalseMask).addReg(RFalse).addReg(MaskReg);
1784       // emit or into dest reg
1785       unsigned OROpc = (SVT == MVT::f32) ? X86::ORPSrr : X86::ORPDrr;
1786       BuildMI(BB, OROpc, 2, RDest).addReg(TrueMask).addReg(FalseMask);
1787       return;
1788     } else {
1789       // do the test and branch thing
1790       // Get the condition into the zero flag.
1791       unsigned CondReg = SelectExpr(Cond);
1792       BuildMI(BB, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
1793
1794       // Create an iterator with which to insert the MBB for copying the false
1795       // value and the MBB to hold the PHI instruction for this SetCC.
1796       MachineBasicBlock *thisMBB = BB;
1797       const BasicBlock *LLVM_BB = BB->getBasicBlock();
1798       ilist<MachineBasicBlock>::iterator It = BB;
1799       ++It;
1800
1801       //  thisMBB:
1802       //  ...
1803       //   TrueVal = ...
1804       //   cmpTY ccX, r1, r2
1805       //   bCC sinkMBB
1806       //   fallthrough --> copy0MBB
1807       MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
1808       MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
1809       BuildMI(BB, X86::JNE, 1).addMBB(sinkMBB);
1810       MachineFunction *F = BB->getParent();
1811       F->getBasicBlockList().insert(It, copy0MBB);
1812       F->getBasicBlockList().insert(It, sinkMBB);
1813       // Update machine-CFG edges
1814       BB->addSuccessor(copy0MBB);
1815       BB->addSuccessor(sinkMBB);
1816
1817       //  copy0MBB:
1818       //   %FalseValue = ...
1819       //   # fallthrough to sinkMBB
1820       BB = copy0MBB;
1821       // Update machine-CFG edges
1822       BB->addSuccessor(sinkMBB);
1823
1824       //  sinkMBB:
1825       //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
1826       //  ...
1827       BB = sinkMBB;
1828       BuildMI(BB, X86::PHI, 4, RDest).addReg(RFalse)
1829         .addMBB(copy0MBB).addReg(RTrue).addMBB(thisMBB);
1830     }
1831     return;
1832   }
1833
1834   unsigned Opc = 0;
1835   if (CondCode != NOT_SET) {
1836     switch (SVT) {
1837     default: assert(0 && "Cannot select this type!");
1838     case MVT::i16: Opc = CMOVTAB16[CondCode]; break;
1839     case MVT::i32: Opc = CMOVTAB32[CondCode]; break;
1840     case MVT::f64: Opc = CMOVTABFP[CondCode]; break;
1841     }
1842   }
1843
1844   // Finally, if we weren't able to fold this, just emit the condition and test
1845   // it.
1846   if (CondCode == NOT_SET || Opc == 0) {
1847     // Get the condition into the zero flag.
1848     unsigned CondReg = SelectExpr(Cond);
1849     BuildMI(BB, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
1850
1851     switch (SVT) {
1852     default: assert(0 && "Cannot select this type!");
1853     case MVT::i16: Opc = X86::CMOVE16rr; break;
1854     case MVT::i32: Opc = X86::CMOVE32rr; break;
1855     case MVT::f64: Opc = X86::FCMOVE; break;
1856     }
1857   } else {
1858     // FIXME: CMP R, 0 -> TEST R, R
1859     EmitCMP(Cond.getOperand(0), Cond.getOperand(1), Cond.Val->hasOneUse());
1860     std::swap(RTrue, RFalse);
1861   }
1862   BuildMI(BB, Opc, 2, RDest).addReg(RTrue).addReg(RFalse);
1863 }
1864
1865 void ISel::EmitCMP(SDOperand LHS, SDOperand RHS, bool HasOneUse) {
1866   unsigned Opc;
1867   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
1868     Opc = 0;
1869     if (HasOneUse && isFoldableLoad(LHS, RHS)) {
1870       switch (RHS.getValueType()) {
1871       default: break;
1872       case MVT::i1:
1873       case MVT::i8:  Opc = X86::CMP8mi;  break;
1874       case MVT::i16: Opc = X86::CMP16mi; break;
1875       case MVT::i32: Opc = X86::CMP32mi; break;
1876       }
1877       if (Opc) {
1878         X86AddressMode AM;
1879         EmitFoldedLoad(LHS, AM);
1880         addFullAddress(BuildMI(BB, Opc, 5), AM).addImm(CN->getValue());
1881         return;
1882       }
1883     }
1884
1885     switch (RHS.getValueType()) {
1886     default: break;
1887     case MVT::i1:
1888     case MVT::i8:  Opc = X86::CMP8ri;  break;
1889     case MVT::i16: Opc = X86::CMP16ri; break;
1890     case MVT::i32: Opc = X86::CMP32ri; break;
1891     }
1892     if (Opc) {
1893       unsigned Tmp1 = SelectExpr(LHS);
1894       BuildMI(BB, Opc, 2).addReg(Tmp1).addImm(CN->getValue());
1895       return;
1896     }
1897   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(RHS)) {
1898     if (!X86ScalarSSE && (CN->isExactlyValue(+0.0) ||
1899                           CN->isExactlyValue(-0.0))) {
1900       unsigned Reg = SelectExpr(LHS);
1901       BuildMI(BB, X86::FTST, 1).addReg(Reg);
1902       BuildMI(BB, X86::FNSTSW8r, 0);
1903       BuildMI(BB, X86::SAHF, 1);
1904       return;
1905     }
1906   }
1907
1908   Opc = 0;
1909   if (HasOneUse && isFoldableLoad(LHS, RHS)) {
1910     switch (RHS.getValueType()) {
1911     default: break;
1912     case MVT::i1:
1913     case MVT::i8:  Opc = X86::CMP8mr;  break;
1914     case MVT::i16: Opc = X86::CMP16mr; break;
1915     case MVT::i32: Opc = X86::CMP32mr; break;
1916     }
1917     if (Opc) {
1918       X86AddressMode AM;
1919       EmitFoldedLoad(LHS, AM);
1920       unsigned Reg = SelectExpr(RHS);
1921       addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(Reg);
1922       return;
1923     }
1924   }
1925
1926   switch (LHS.getValueType()) {
1927   default: assert(0 && "Cannot compare this value!");
1928   case MVT::i1:
1929   case MVT::i8:  Opc = X86::CMP8rr;  break;
1930   case MVT::i16: Opc = X86::CMP16rr; break;
1931   case MVT::i32: Opc = X86::CMP32rr; break;
1932   case MVT::f32: Opc = X86::UCOMISSrr; break;
1933   case MVT::f64: Opc = X86ScalarSSE ? X86::UCOMISDrr : X86::FUCOMIr; break;
1934   }
1935   unsigned Tmp1, Tmp2;
1936   if (getRegPressure(LHS) > getRegPressure(RHS)) {
1937     Tmp1 = SelectExpr(LHS);
1938     Tmp2 = SelectExpr(RHS);
1939   } else {
1940     Tmp2 = SelectExpr(RHS);
1941     Tmp1 = SelectExpr(LHS);
1942   }
1943   BuildMI(BB, Opc, 2).addReg(Tmp1).addReg(Tmp2);
1944 }
1945
1946 /// isFoldableLoad - Return true if this is a load instruction that can safely
1947 /// be folded into an operation that uses it.
1948 bool ISel::isFoldableLoad(SDOperand Op, SDOperand OtherOp, bool FloatPromoteOk){
1949   if (Op.getOpcode() == ISD::LOAD) {
1950     // FIXME: currently can't fold constant pool indexes.
1951     if (isa<ConstantPoolSDNode>(Op.getOperand(1)))
1952       return false;
1953   } else if (FloatPromoteOk && Op.getOpcode() == ISD::EXTLOAD &&
1954              cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::f32) {
1955     // FIXME: currently can't fold constant pool indexes.
1956     if (isa<ConstantPoolSDNode>(Op.getOperand(1)))
1957       return false;
1958   } else {
1959     return false;
1960   }
1961
1962   // If this load has already been emitted, we clearly can't fold it.
1963   assert(Op.ResNo == 0 && "Not a use of the value of the load?");
1964   if (ExprMap.count(Op.getValue(1))) return false;
1965   assert(!ExprMap.count(Op.getValue(0)) && "Value in map but not token chain?");
1966   assert(!ExprMap.count(Op.getValue(1))&&"Token lowered but value not in map?");
1967
1968   // If there is not just one use of its value, we cannot fold.
1969   if (!Op.Val->hasNUsesOfValue(1, 0)) return false;
1970
1971   // Finally, we cannot fold the load into the operation if this would induce a
1972   // cycle into the resultant dag.  To check for this, see if OtherOp (the other
1973   // operand of the operation we are folding the load into) can possible use the
1974   // chain node defined by the load.
1975   if (OtherOp.Val && !Op.Val->hasNUsesOfValue(0, 1)) { // Has uses of chain?
1976     std::set<SDNode*> Visited;
1977     if (NodeTransitivelyUsesValue(OtherOp, Op.getValue(1), Visited))
1978       return false;
1979   }
1980   return true;
1981 }
1982
1983
1984 /// EmitFoldedLoad - Ensure that the arguments of the load are code generated,
1985 /// and compute the address being loaded into AM.
1986 void ISel::EmitFoldedLoad(SDOperand Op, X86AddressMode &AM) {
1987   SDOperand Chain   = Op.getOperand(0);
1988   SDOperand Address = Op.getOperand(1);
1989
1990   if (getRegPressure(Chain) > getRegPressure(Address)) {
1991     Select(Chain);
1992     SelectAddress(Address, AM);
1993   } else {
1994     SelectAddress(Address, AM);
1995     Select(Chain);
1996   }
1997
1998   // The chain for this load is now lowered.
1999   assert(ExprMap.count(SDOperand(Op.Val, 1)) == 0 &&
2000          "Load emitted more than once?");
2001   if (!ExprMap.insert(std::make_pair(Op.getValue(1), 1)).second)
2002     assert(0 && "Load emitted more than once!");
2003 }
2004
2005 // EmitOrOpOp - Pattern match the expression (Op1|Op2), where we know that op1
2006 // and op2 are i8/i16/i32 values with one use each (the or).  If we can form a
2007 // SHLD or SHRD, emit the instruction (generating the value into DestReg) and
2008 // return true.
2009 bool ISel::EmitOrOpOp(SDOperand Op1, SDOperand Op2, unsigned DestReg) {
2010   if (Op1.getOpcode() == ISD::SHL && Op2.getOpcode() == ISD::SRL) {
2011     // good!
2012   } else if (Op2.getOpcode() == ISD::SHL && Op1.getOpcode() == ISD::SRL) {
2013     std::swap(Op1, Op2);  // Op1 is the SHL now.
2014   } else {
2015     return false;  // No match
2016   }
2017
2018   SDOperand ShlVal = Op1.getOperand(0);
2019   SDOperand ShlAmt = Op1.getOperand(1);
2020   SDOperand ShrVal = Op2.getOperand(0);
2021   SDOperand ShrAmt = Op2.getOperand(1);
2022
2023   unsigned RegSize = MVT::getSizeInBits(Op1.getValueType());
2024
2025   // Find out if ShrAmt = 32-ShlAmt  or  ShlAmt = 32-ShrAmt.
2026   if (ShlAmt.getOpcode() == ISD::SUB && ShlAmt.getOperand(1) == ShrAmt)
2027     if (ConstantSDNode *SubCST = dyn_cast<ConstantSDNode>(ShlAmt.getOperand(0)))
2028       if (SubCST->getValue() == RegSize) {
2029         // (A >> ShrAmt) | (A << (32-ShrAmt)) ==> ROR A, ShrAmt
2030         // (A >> ShrAmt) | (B << (32-ShrAmt)) ==> SHRD A, B, ShrAmt
2031         if (ShrVal == ShlVal) {
2032           unsigned Reg, ShAmt;
2033           if (getRegPressure(ShrVal) > getRegPressure(ShrAmt)) {
2034             Reg = SelectExpr(ShrVal);
2035             ShAmt = SelectExpr(ShrAmt);
2036           } else {
2037             ShAmt = SelectExpr(ShrAmt);
2038             Reg = SelectExpr(ShrVal);
2039           }
2040           BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
2041           unsigned Opc = RegSize == 8 ? X86::ROR8rCL :
2042                         (RegSize == 16 ? X86::ROR16rCL : X86::ROR32rCL);
2043           BuildMI(BB, Opc, 1, DestReg).addReg(Reg);
2044           return true;
2045         } else if (RegSize != 8) {
2046           unsigned AReg, BReg;
2047           if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
2048             BReg = SelectExpr(ShlVal);
2049             AReg = SelectExpr(ShrVal);
2050           } else {
2051             AReg = SelectExpr(ShrVal);
2052             BReg = SelectExpr(ShlVal);
2053           }
2054           unsigned ShAmt = SelectExpr(ShrAmt);
2055           BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
2056           unsigned Opc = RegSize == 16 ? X86::SHRD16rrCL : X86::SHRD32rrCL;
2057           BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
2058           return true;
2059         }
2060       }
2061
2062   if (ShrAmt.getOpcode() == ISD::SUB && ShrAmt.getOperand(1) == ShlAmt)
2063     if (ConstantSDNode *SubCST = dyn_cast<ConstantSDNode>(ShrAmt.getOperand(0)))
2064       if (SubCST->getValue() == RegSize) {
2065         // (A << ShlAmt) | (A >> (32-ShlAmt)) ==> ROL A, ShrAmt
2066         // (A << ShlAmt) | (B >> (32-ShlAmt)) ==> SHLD A, B, ShrAmt
2067         if (ShrVal == ShlVal) {
2068           unsigned Reg, ShAmt;
2069           if (getRegPressure(ShrVal) > getRegPressure(ShlAmt)) {
2070             Reg = SelectExpr(ShrVal);
2071             ShAmt = SelectExpr(ShlAmt);
2072           } else {
2073             ShAmt = SelectExpr(ShlAmt);
2074             Reg = SelectExpr(ShrVal);
2075           }
2076           BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
2077           unsigned Opc = RegSize == 8 ? X86::ROL8rCL :
2078                         (RegSize == 16 ? X86::ROL16rCL : X86::ROL32rCL);
2079           BuildMI(BB, Opc, 1, DestReg).addReg(Reg);
2080           return true;
2081         } else if (RegSize != 8) {
2082           unsigned AReg, BReg;
2083           if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
2084             AReg = SelectExpr(ShlVal);
2085             BReg = SelectExpr(ShrVal);
2086           } else {
2087             BReg = SelectExpr(ShrVal);
2088             AReg = SelectExpr(ShlVal);
2089           }
2090           unsigned ShAmt = SelectExpr(ShlAmt);
2091           BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
2092           unsigned Opc = RegSize == 16 ? X86::SHLD16rrCL : X86::SHLD32rrCL;
2093           BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
2094           return true;
2095         }
2096       }
2097
2098   if (ConstantSDNode *ShrCst = dyn_cast<ConstantSDNode>(ShrAmt))
2099     if (ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(ShlAmt))
2100       if (ShrCst->getValue() < RegSize && ShlCst->getValue() < RegSize)
2101         if (ShrCst->getValue() == RegSize-ShlCst->getValue()) {
2102           // (A >> 5) | (A << 27) --> ROR A, 5
2103           // (A >> 5) | (B << 27) --> SHRD A, B, 5
2104           if (ShrVal == ShlVal) {
2105             unsigned Reg = SelectExpr(ShrVal);
2106             unsigned Opc = RegSize == 8 ? X86::ROR8ri :
2107               (RegSize == 16 ? X86::ROR16ri : X86::ROR32ri);
2108             BuildMI(BB, Opc, 2, DestReg).addReg(Reg).addImm(ShrCst->getValue());
2109             return true;
2110           } else if (RegSize != 8) {
2111             unsigned AReg, BReg;
2112             if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
2113               BReg = SelectExpr(ShlVal);
2114               AReg = SelectExpr(ShrVal);
2115             } else {
2116               AReg = SelectExpr(ShrVal);
2117               BReg = SelectExpr(ShlVal);
2118             }
2119             unsigned Opc = RegSize == 16 ? X86::SHRD16rri8 : X86::SHRD32rri8;
2120             BuildMI(BB, Opc, 3, DestReg).addReg(AReg).addReg(BReg)
2121               .addImm(ShrCst->getValue());
2122             return true;
2123           }
2124         }
2125
2126   return false;
2127 }
2128
2129 unsigned ISel::SelectExpr(SDOperand N) {
2130   unsigned Result;
2131   unsigned Tmp1, Tmp2, Tmp3;
2132   unsigned Opc = 0;
2133   SDNode *Node = N.Val;
2134   SDOperand Op0, Op1;
2135
2136   if (Node->getOpcode() == ISD::CopyFromReg) {
2137     if (MRegisterInfo::isVirtualRegister(cast<RegSDNode>(Node)->getReg()) ||
2138         cast<RegSDNode>(Node)->getReg() == X86::ESP) {
2139       // Just use the specified register as our input.
2140       return cast<RegSDNode>(Node)->getReg();
2141     }
2142   }
2143
2144   unsigned &Reg = ExprMap[N];
2145   if (Reg) return Reg;
2146
2147   switch (N.getOpcode()) {
2148   default:
2149     Reg = Result = (N.getValueType() != MVT::Other) ?
2150                             MakeReg(N.getValueType()) : 1;
2151     break;
2152   case X86ISD::TAILCALL:
2153   case X86ISD::CALL:
2154     // If this is a call instruction, make sure to prepare ALL of the result
2155     // values as well as the chain.
2156     ExprMap[N.getValue(0)] = 1;
2157     if (Node->getNumValues() > 1) {
2158       Result = MakeReg(Node->getValueType(1));
2159       ExprMap[N.getValue(1)] = Result;
2160       for (unsigned i = 2, e = Node->getNumValues(); i != e; ++i)
2161         ExprMap[N.getValue(i)] = MakeReg(Node->getValueType(i));
2162     } else {
2163       Result = 1;
2164     }
2165     break;
2166   case ISD::ADD_PARTS:
2167   case ISD::SUB_PARTS:
2168   case ISD::SHL_PARTS:
2169   case ISD::SRL_PARTS:
2170   case ISD::SRA_PARTS:
2171     Result = MakeReg(Node->getValueType(0));
2172     ExprMap[N.getValue(0)] = Result;
2173     for (unsigned i = 1, e = N.Val->getNumValues(); i != e; ++i)
2174       ExprMap[N.getValue(i)] = MakeReg(Node->getValueType(i));
2175     break;
2176   }
2177
2178   switch (N.getOpcode()) {
2179   default:
2180     Node->dump();
2181     assert(0 && "Node not handled!\n");
2182   case ISD::FP_EXTEND:
2183     assert(X86ScalarSSE && "Scalar SSE FP must be enabled to use f32");
2184     Tmp1 = SelectExpr(N.getOperand(0));
2185     BuildMI(BB, X86::CVTSS2SDrr, 1, Result).addReg(Tmp1);
2186     return Result;
2187   case ISD::CopyFromReg:
2188     Select(N.getOperand(0));
2189     if (Result == 1) {
2190       Reg = Result = ExprMap[N.getValue(0)] =
2191         MakeReg(N.getValue(0).getValueType());
2192     }
2193     switch (Node->getValueType(0)) {
2194     default: assert(0 && "Cannot CopyFromReg this!");
2195     case MVT::i1:
2196     case MVT::i8:
2197       BuildMI(BB, X86::MOV8rr, 1,
2198               Result).addReg(cast<RegSDNode>(Node)->getReg());
2199       return Result;
2200     case MVT::i16:
2201       BuildMI(BB, X86::MOV16rr, 1,
2202               Result).addReg(cast<RegSDNode>(Node)->getReg());
2203       return Result;
2204     case MVT::i32:
2205       BuildMI(BB, X86::MOV32rr, 1,
2206               Result).addReg(cast<RegSDNode>(Node)->getReg());
2207       return Result;
2208     }
2209
2210   case ISD::FrameIndex:
2211     Tmp1 = cast<FrameIndexSDNode>(N)->getIndex();
2212     addFrameReference(BuildMI(BB, X86::LEA32r, 4, Result), (int)Tmp1);
2213     return Result;
2214   case ISD::ConstantPool:
2215     Tmp1 = cast<ConstantPoolSDNode>(N)->getIndex();
2216     addConstantPoolReference(BuildMI(BB, X86::LEA32r, 4, Result), Tmp1);
2217     return Result;
2218   case ISD::ConstantFP:
2219     ContainsFPCode = true;
2220     Tmp1 = Result;   // Intermediate Register
2221     if (cast<ConstantFPSDNode>(N)->getValue() < 0.0 ||
2222         cast<ConstantFPSDNode>(N)->isExactlyValue(-0.0))
2223       Tmp1 = MakeReg(MVT::f64);
2224
2225     if (cast<ConstantFPSDNode>(N)->isExactlyValue(+0.0) ||
2226         cast<ConstantFPSDNode>(N)->isExactlyValue(-0.0))
2227       BuildMI(BB, X86::FLD0, 0, Tmp1);
2228     else if (cast<ConstantFPSDNode>(N)->isExactlyValue(+1.0) ||
2229              cast<ConstantFPSDNode>(N)->isExactlyValue(-1.0))
2230       BuildMI(BB, X86::FLD1, 0, Tmp1);
2231     else
2232       assert(0 && "Unexpected constant!");
2233     if (Tmp1 != Result)
2234       BuildMI(BB, X86::FCHS, 1, Result).addReg(Tmp1);
2235     return Result;
2236   case ISD::Constant:
2237     switch (N.getValueType()) {
2238     default: assert(0 && "Cannot use constants of this type!");
2239     case MVT::i1:
2240     case MVT::i8:  Opc = X86::MOV8ri;  break;
2241     case MVT::i16: Opc = X86::MOV16ri; break;
2242     case MVT::i32: Opc = X86::MOV32ri; break;
2243     }
2244     BuildMI(BB, Opc, 1,Result).addImm(cast<ConstantSDNode>(N)->getValue());
2245     return Result;
2246   case ISD::UNDEF:
2247     if (Node->getValueType(0) == MVT::f64) {
2248       // FIXME: SHOULD TEACH STACKIFIER ABOUT UNDEF VALUES!
2249       BuildMI(BB, X86::FLD0, 0, Result);
2250     } else {
2251       BuildMI(BB, X86::IMPLICIT_DEF, 0, Result);
2252     }
2253     return Result;
2254   case ISD::GlobalAddress: {
2255     GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
2256     // For Darwin, external and weak symbols are indirect, so we want to load
2257     // the value at address GV, not the value of GV itself.
2258     if (Subtarget->getIndirectExternAndWeakGlobals() &&
2259         (GV->hasWeakLinkage() || GV->isExternal())) {
2260       BuildMI(BB, X86::MOV32rm, 4, Result).addReg(0).addZImm(1).addReg(0)
2261         .addGlobalAddress(GV, false, 0);
2262     } else {
2263       BuildMI(BB, X86::MOV32ri, 1, Result).addGlobalAddress(GV);
2264     }
2265     return Result;
2266   }
2267   case ISD::ExternalSymbol: {
2268     const char *Sym = cast<ExternalSymbolSDNode>(N)->getSymbol();
2269     BuildMI(BB, X86::MOV32ri, 1, Result).addExternalSymbol(Sym);
2270     return Result;
2271   }
2272   case ISD::ZERO_EXTEND: {
2273     int DestIs16 = N.getValueType() == MVT::i16;
2274     int SrcIs16  = N.getOperand(0).getValueType() == MVT::i16;
2275
2276     // FIXME: This hack is here for zero extension casts from bool to i8.  This
2277     // would not be needed if bools were promoted by Legalize.
2278     if (N.getValueType() == MVT::i8) {
2279       Tmp1 = SelectExpr(N.getOperand(0));
2280       BuildMI(BB, X86::MOV8rr, 1, Result).addReg(Tmp1);
2281       return Result;
2282     }
2283
2284     if (isFoldableLoad(N.getOperand(0), SDOperand())) {
2285       static const unsigned Opc[3] = {
2286         X86::MOVZX32rm8, X86::MOVZX32rm16, X86::MOVZX16rm8
2287       };
2288
2289       X86AddressMode AM;
2290       EmitFoldedLoad(N.getOperand(0), AM);
2291       addFullAddress(BuildMI(BB, Opc[SrcIs16+DestIs16*2], 4, Result), AM);
2292
2293       return Result;
2294     }
2295
2296     static const unsigned Opc[3] = {
2297       X86::MOVZX32rr8, X86::MOVZX32rr16, X86::MOVZX16rr8
2298     };
2299     Tmp1 = SelectExpr(N.getOperand(0));
2300     BuildMI(BB, Opc[SrcIs16+DestIs16*2], 1, Result).addReg(Tmp1);
2301     return Result;
2302   }
2303   case ISD::SIGN_EXTEND: {
2304     int DestIs16 = N.getValueType() == MVT::i16;
2305     int SrcIs16  = N.getOperand(0).getValueType() == MVT::i16;
2306
2307     // FIXME: Legalize should promote bools to i8!
2308     assert(N.getOperand(0).getValueType() != MVT::i1 &&
2309            "Sign extend from bool not implemented!");
2310
2311     if (isFoldableLoad(N.getOperand(0), SDOperand())) {
2312       static const unsigned Opc[3] = {
2313         X86::MOVSX32rm8, X86::MOVSX32rm16, X86::MOVSX16rm8
2314       };
2315
2316       X86AddressMode AM;
2317       EmitFoldedLoad(N.getOperand(0), AM);
2318       addFullAddress(BuildMI(BB, Opc[SrcIs16+DestIs16*2], 4, Result), AM);
2319       return Result;
2320     }
2321
2322     static const unsigned Opc[3] = {
2323       X86::MOVSX32rr8, X86::MOVSX32rr16, X86::MOVSX16rr8
2324     };
2325     Tmp1 = SelectExpr(N.getOperand(0));
2326     BuildMI(BB, Opc[SrcIs16+DestIs16*2], 1, Result).addReg(Tmp1);
2327     return Result;
2328   }
2329   case ISD::TRUNCATE:
2330     // Fold TRUNCATE (LOAD P) into a smaller load from P.
2331     // FIXME: This should be performed by the DAGCombiner.
2332     if (isFoldableLoad(N.getOperand(0), SDOperand())) {
2333       switch (N.getValueType()) {
2334       default: assert(0 && "Unknown truncate!");
2335       case MVT::i1:
2336       case MVT::i8:  Opc = X86::MOV8rm;  break;
2337       case MVT::i16: Opc = X86::MOV16rm; break;
2338       }
2339       X86AddressMode AM;
2340       EmitFoldedLoad(N.getOperand(0), AM);
2341       addFullAddress(BuildMI(BB, Opc, 4, Result), AM);
2342       return Result;
2343     }
2344
2345     // Handle cast of LARGER int to SMALLER int using a move to EAX followed by
2346     // a move out of AX or AL.
2347     switch (N.getOperand(0).getValueType()) {
2348     default: assert(0 && "Unknown truncate!");
2349     case MVT::i8:  Tmp2 = X86::AL;  Opc = X86::MOV8rr;  break;
2350     case MVT::i16: Tmp2 = X86::AX;  Opc = X86::MOV16rr; break;
2351     case MVT::i32: Tmp2 = X86::EAX; Opc = X86::MOV32rr; break;
2352     }
2353     Tmp1 = SelectExpr(N.getOperand(0));
2354     BuildMI(BB, Opc, 1, Tmp2).addReg(Tmp1);
2355
2356     switch (N.getValueType()) {
2357     default: assert(0 && "Unknown truncate!");
2358     case MVT::i1:
2359     case MVT::i8:  Tmp2 = X86::AL;  Opc = X86::MOV8rr;  break;
2360     case MVT::i16: Tmp2 = X86::AX;  Opc = X86::MOV16rr; break;
2361     }
2362     BuildMI(BB, Opc, 1, Result).addReg(Tmp2);
2363     return Result;
2364
2365   case ISD::SINT_TO_FP:
2366   case ISD::UINT_TO_FP: {
2367     Tmp1 = SelectExpr(N.getOperand(0));  // Get the operand register
2368     unsigned PromoteOpcode = 0;
2369
2370     // We can handle any sint to fp, and 8 and 16 uint to fp with the direct
2371     // sse conversion instructions.
2372     if (X86ScalarSSE) {
2373       MVT::ValueType SrcTy = N.getOperand(0).getValueType();
2374       MVT::ValueType DstTy = N.getValueType();
2375       switch (SrcTy) {
2376       case MVT::i1:
2377       case MVT::i8:
2378         PromoteOpcode = (N.getOpcode() == ISD::UINT_TO_FP) ?
2379           X86::MOVZX32rr8 : X86::MOVSX32rr8;
2380         break;
2381       case MVT::i16:
2382         PromoteOpcode = (N.getOpcode() == ISD::UINT_TO_FP) ?
2383           X86::MOVZX32rr16 : X86::MOVSX32rr16;
2384         break;
2385       default:
2386         assert(N.getOpcode() != ISD::UINT_TO_FP);
2387         break;
2388       }
2389       if (PromoteOpcode) {
2390         BuildMI(BB, PromoteOpcode, 1, Tmp2).addReg(Tmp1);
2391         Tmp1 = Tmp2;
2392       }
2393       Opc = (DstTy == MVT::f64) ? X86::CVTSI2SDrr : X86::CVTSI2SSrr;
2394       BuildMI(BB, Opc, 1, Result).addReg(Tmp1);
2395       return Result;
2396     }
2397
2398     // FIXME: Most of this grunt work should be done by legalize!
2399     ContainsFPCode = true;
2400
2401     // Promote the integer to a type supported by FLD.  We do this because there
2402     // are no unsigned FLD instructions, so we must promote an unsigned value to
2403     // a larger signed value, then use FLD on the larger value.
2404     //
2405     MVT::ValueType PromoteType = MVT::Other;
2406     MVT::ValueType SrcTy = N.getOperand(0).getValueType();
2407     unsigned RealDestReg = Result;
2408     switch (SrcTy) {
2409     case MVT::i1:
2410     case MVT::i8:
2411       // We don't have the facilities for directly loading byte sized data from
2412       // memory (even signed).  Promote it to 16 bits.
2413       PromoteType = MVT::i16;
2414       PromoteOpcode = Node->getOpcode() == ISD::SINT_TO_FP ?
2415         X86::MOVSX16rr8 : X86::MOVZX16rr8;
2416       break;
2417     case MVT::i16:
2418       if (Node->getOpcode() == ISD::UINT_TO_FP) {
2419         PromoteType = MVT::i32;
2420         PromoteOpcode = X86::MOVZX32rr16;
2421       }
2422       break;
2423     default:
2424       // Don't fild into the real destination.
2425       if (Node->getOpcode() == ISD::UINT_TO_FP)
2426         Result = MakeReg(Node->getValueType(0));
2427       break;
2428     }
2429
2430     if (PromoteType != MVT::Other) {
2431       Tmp2 = MakeReg(PromoteType);
2432       BuildMI(BB, PromoteOpcode, 1, Tmp2).addReg(Tmp1);
2433       SrcTy = PromoteType;
2434       Tmp1 = Tmp2;
2435     }
2436
2437     // Spill the integer to memory and reload it from there.
2438     unsigned Size = MVT::getSizeInBits(SrcTy)/8;
2439     MachineFunction *F = BB->getParent();
2440     int FrameIdx = F->getFrameInfo()->CreateStackObject(Size, Size);
2441
2442     switch (SrcTy) {
2443     case MVT::i32:
2444       addFrameReference(BuildMI(BB, X86::MOV32mr, 5),
2445                         FrameIdx).addReg(Tmp1);
2446       addFrameReference(BuildMI(BB, X86::FILD32m, 5, Result), FrameIdx);
2447       break;
2448     case MVT::i16:
2449       addFrameReference(BuildMI(BB, X86::MOV16mr, 5),
2450                         FrameIdx).addReg(Tmp1);
2451       addFrameReference(BuildMI(BB, X86::FILD16m, 5, Result), FrameIdx);
2452       break;
2453     default: break; // No promotion required.
2454     }
2455
2456     if (Node->getOpcode() == ISD::UINT_TO_FP && Result != RealDestReg) {
2457       // If this is a cast from uint -> double, we need to be careful when if
2458       // the "sign" bit is set.  If so, we don't want to make a negative number,
2459       // we want to make a positive number.  Emit code to add an offset if the
2460       // sign bit is set.
2461
2462       // Compute whether the sign bit is set by shifting the reg right 31 bits.
2463       unsigned IsNeg = MakeReg(MVT::i32);
2464       BuildMI(BB, X86::SHR32ri, 2, IsNeg).addReg(Tmp1).addImm(31);
2465
2466       // Create a CP value that has the offset in one word and 0 in the other.
2467       static ConstantInt *TheOffset = ConstantUInt::get(Type::ULongTy,
2468                                                         0x4f80000000000000ULL);
2469       unsigned CPI = F->getConstantPool()->getConstantPoolIndex(TheOffset);
2470       BuildMI(BB, X86::FADD32m, 5, RealDestReg).addReg(Result)
2471         .addConstantPoolIndex(CPI).addZImm(4).addReg(IsNeg).addSImm(0);
2472     }
2473     return RealDestReg;
2474   }
2475   case ISD::FP_TO_SINT:
2476   case ISD::FP_TO_UINT: {
2477     // FIXME: Most of this grunt work should be done by legalize!
2478     Tmp1 = SelectExpr(N.getOperand(0));  // Get the operand register
2479
2480     // If the target supports SSE2 and is performing FP operations in SSE regs
2481     // instead of the FP stack, then we can use the efficient CVTSS2SI and
2482     // CVTSD2SI instructions.
2483     if (ISD::FP_TO_SINT == N.getOpcode() && X86ScalarSSE) {
2484       if (MVT::f32 == N.getOperand(0).getValueType()) {
2485         BuildMI(BB, X86::CVTSS2SIrr, 1, Result).addReg(Tmp1);
2486       } else if (MVT::f64 == N.getOperand(0).getValueType()) {
2487         BuildMI(BB, X86::CVTSD2SIrr, 1, Result).addReg(Tmp1);
2488       } else {
2489         assert(0 && "Not an f32 or f64?");
2490         abort();
2491       }
2492       return Result;
2493     }
2494
2495     // Change the floating point control register to use "round towards zero"
2496     // mode when truncating to an integer value.
2497     //
2498     MachineFunction *F = BB->getParent();
2499     int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
2500     addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
2501
2502     // Load the old value of the high byte of the control word...
2503     unsigned HighPartOfCW = MakeReg(MVT::i8);
2504     addFrameReference(BuildMI(BB, X86::MOV8rm, 4, HighPartOfCW),
2505                       CWFrameIdx, 1);
2506
2507     // Set the high part to be round to zero...
2508     addFrameReference(BuildMI(BB, X86::MOV8mi, 5),
2509                       CWFrameIdx, 1).addImm(12);
2510
2511     // Reload the modified control word now...
2512     addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
2513
2514     // Restore the memory image of control word to original value
2515     addFrameReference(BuildMI(BB, X86::MOV8mr, 5),
2516                       CWFrameIdx, 1).addReg(HighPartOfCW);
2517
2518     // We don't have the facilities for directly storing byte sized data to
2519     // memory.  Promote it to 16 bits.  We also must promote unsigned values to
2520     // larger classes because we only have signed FP stores.
2521     MVT::ValueType StoreClass = Node->getValueType(0);
2522     if (StoreClass == MVT::i8 || Node->getOpcode() == ISD::FP_TO_UINT)
2523       switch (StoreClass) {
2524       case MVT::i1:
2525       case MVT::i8:  StoreClass = MVT::i16; break;
2526       case MVT::i16: StoreClass = MVT::i32; break;
2527       case MVT::i32: StoreClass = MVT::i64; break;
2528       default: assert(0 && "Unknown store class!");
2529       }
2530
2531     // Spill the integer to memory and reload it from there.
2532     unsigned Size = MVT::getSizeInBits(StoreClass)/8;
2533     int FrameIdx = F->getFrameInfo()->CreateStackObject(Size, Size);
2534
2535     switch (StoreClass) {
2536     default: assert(0 && "Unknown store class!");
2537     case MVT::i16:
2538       addFrameReference(BuildMI(BB, X86::FIST16m, 5), FrameIdx).addReg(Tmp1);
2539       break;
2540     case MVT::i32:
2541       addFrameReference(BuildMI(BB, X86::FIST32m, 5), FrameIdx).addReg(Tmp1);
2542       break;
2543     case MVT::i64:
2544       addFrameReference(BuildMI(BB, X86::FISTP64m, 5), FrameIdx).addReg(Tmp1);
2545       break;    }
2546
2547     switch (Node->getValueType(0)) {
2548     default:
2549       assert(0 && "Unknown integer type!");
2550     case MVT::i32:
2551       addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Result), FrameIdx);
2552       break;
2553     case MVT::i16:
2554       addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Result), FrameIdx);
2555       break;
2556     case MVT::i8:
2557     case MVT::i1:
2558       addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Result), FrameIdx);
2559       break;
2560     }
2561
2562     // Reload the original control word now.
2563     addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
2564     return Result;
2565   }
2566   case ISD::ADD:
2567     Op0 = N.getOperand(0);
2568     Op1 = N.getOperand(1);
2569
2570     if (isFoldableLoad(Op0, Op1, true)) {
2571       std::swap(Op0, Op1);
2572       goto FoldAdd;
2573     }
2574
2575     if (isFoldableLoad(Op1, Op0, true)) {
2576     FoldAdd:
2577       switch (N.getValueType()) {
2578       default: assert(0 && "Cannot add this type!");
2579       case MVT::i1:
2580       case MVT::i8:  Opc = X86::ADD8rm;  break;
2581       case MVT::i16: Opc = X86::ADD16rm; break;
2582       case MVT::i32: Opc = X86::ADD32rm; break;
2583       case MVT::f32: Opc = X86::ADDSSrm; break;
2584       case MVT::f64:
2585         // For F64, handle promoted load operations (from F32) as well!
2586         if (X86ScalarSSE) {
2587           assert(Op1.getOpcode() == ISD::LOAD && "SSE load not promoted");
2588           Opc = X86::ADDSDrm;
2589         } else {
2590           Opc = Op1.getOpcode() == ISD::LOAD ? X86::FADD64m : X86::FADD32m;
2591         }
2592         break;
2593       }
2594       X86AddressMode AM;
2595       EmitFoldedLoad(Op1, AM);
2596       Tmp1 = SelectExpr(Op0);
2597       addFullAddress(BuildMI(BB, Opc, 5, Result).addReg(Tmp1), AM);
2598       return Result;
2599     }
2600
2601     // See if we can codegen this as an LEA to fold operations together.
2602     if (N.getValueType() == MVT::i32) {
2603       ExprMap.erase(N);
2604       X86ISelAddressMode AM;
2605       MatchAddress(N, AM);
2606       ExprMap[N] = Result;
2607
2608       // If this is not just an add, emit the LEA.  For a simple add (like
2609       // reg+reg or reg+imm), we just emit an add.  It might be a good idea to
2610       // leave this as LEA, then peephole it to 'ADD' after two address elim
2611       // happens.
2612       if (AM.Scale != 1 || AM.BaseType == X86ISelAddressMode::FrameIndexBase||
2613           AM.GV || (AM.Base.Reg.Val && AM.IndexReg.Val && AM.Disp)) {
2614         X86AddressMode XAM = SelectAddrExprs(AM);
2615         addFullAddress(BuildMI(BB, X86::LEA32r, 4, Result), XAM);
2616         return Result;
2617       }
2618     }
2619
2620     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2621       Opc = 0;
2622       if (CN->getValue() == 1) {   // add X, 1 -> inc X
2623         switch (N.getValueType()) {
2624         default: assert(0 && "Cannot integer add this type!");
2625         case MVT::i8:  Opc = X86::INC8r; break;
2626         case MVT::i16: Opc = X86::INC16r; break;
2627         case MVT::i32: Opc = X86::INC32r; break;
2628         }
2629       } else if (CN->isAllOnesValue()) { // add X, -1 -> dec X
2630         switch (N.getValueType()) {
2631         default: assert(0 && "Cannot integer add this type!");
2632         case MVT::i8:  Opc = X86::DEC8r; break;
2633         case MVT::i16: Opc = X86::DEC16r; break;
2634         case MVT::i32: Opc = X86::DEC32r; break;
2635         }
2636       }
2637
2638       if (Opc) {
2639         Tmp1 = SelectExpr(Op0);
2640         BuildMI(BB, Opc, 1, Result).addReg(Tmp1);
2641         return Result;
2642       }
2643
2644       switch (N.getValueType()) {
2645       default: assert(0 && "Cannot add this type!");
2646       case MVT::i8:  Opc = X86::ADD8ri; break;
2647       case MVT::i16: Opc = X86::ADD16ri; break;
2648       case MVT::i32: Opc = X86::ADD32ri; break;
2649       }
2650       if (Opc) {
2651         Tmp1 = SelectExpr(Op0);
2652         BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
2653         return Result;
2654       }
2655     }
2656
2657     switch (N.getValueType()) {
2658     default: assert(0 && "Cannot add this type!");
2659     case MVT::i8:  Opc = X86::ADD8rr; break;
2660     case MVT::i16: Opc = X86::ADD16rr; break;
2661     case MVT::i32: Opc = X86::ADD32rr; break;
2662     case MVT::f32: Opc = X86::ADDSSrr; break;
2663     case MVT::f64: Opc = X86ScalarSSE ? X86::ADDSDrr : X86::FpADD; break;
2664     }
2665
2666     if (getRegPressure(Op0) > getRegPressure(Op1)) {
2667       Tmp1 = SelectExpr(Op0);
2668       Tmp2 = SelectExpr(Op1);
2669     } else {
2670       Tmp2 = SelectExpr(Op1);
2671       Tmp1 = SelectExpr(Op0);
2672     }
2673
2674     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
2675     return Result;
2676
2677   case ISD::FSQRT:
2678     Tmp1 = SelectExpr(Node->getOperand(0));
2679     if (X86ScalarSSE) {
2680       Opc = (N.getValueType() == MVT::f32) ? X86::SQRTSSrr : X86::SQRTSDrr;
2681       BuildMI(BB, Opc, 1, Result).addReg(Tmp1);
2682     } else {
2683       BuildMI(BB, X86::FSQRT, 1, Result).addReg(Tmp1);
2684     }
2685     return Result;
2686
2687   // FIXME:
2688   // Once we can spill 16 byte constants into the constant pool, we can
2689   // implement SSE equivalents of FABS and FCHS.
2690   case ISD::FABS:
2691   case ISD::FNEG:
2692   case ISD::FSIN:
2693   case ISD::FCOS:
2694     assert(N.getValueType()==MVT::f64 && "Illegal type for this operation");
2695     Tmp1 = SelectExpr(Node->getOperand(0));
2696     switch (N.getOpcode()) {
2697     default: assert(0 && "Unreachable!");
2698     case ISD::FABS: BuildMI(BB, X86::FABS, 1, Result).addReg(Tmp1); break;
2699     case ISD::FNEG: BuildMI(BB, X86::FCHS, 1, Result).addReg(Tmp1); break;
2700     case ISD::FSIN: BuildMI(BB, X86::FSIN, 1, Result).addReg(Tmp1); break;
2701     case ISD::FCOS: BuildMI(BB, X86::FCOS, 1, Result).addReg(Tmp1); break;
2702     }
2703     return Result;
2704
2705   case ISD::MULHU:
2706     switch (N.getValueType()) {
2707     default: assert(0 && "Unsupported VT!");
2708     case MVT::i8:  Tmp2 = X86::MUL8r;  break;
2709     case MVT::i16: Tmp2 = X86::MUL16r;  break;
2710     case MVT::i32: Tmp2 = X86::MUL32r;  break;
2711     }
2712     // FALL THROUGH
2713   case ISD::MULHS: {
2714     unsigned MovOpc, LowReg, HiReg;
2715     switch (N.getValueType()) {
2716     default: assert(0 && "Unsupported VT!");
2717     case MVT::i8:
2718       MovOpc = X86::MOV8rr;
2719       LowReg = X86::AL;
2720       HiReg = X86::AH;
2721       Opc = X86::IMUL8r;
2722       break;
2723     case MVT::i16:
2724       MovOpc = X86::MOV16rr;
2725       LowReg = X86::AX;
2726       HiReg = X86::DX;
2727       Opc = X86::IMUL16r;
2728       break;
2729     case MVT::i32:
2730       MovOpc = X86::MOV32rr;
2731       LowReg = X86::EAX;
2732       HiReg = X86::EDX;
2733       Opc = X86::IMUL32r;
2734       break;
2735     }
2736     if (Node->getOpcode() != ISD::MULHS)
2737       Opc = Tmp2;  // Get the MULHU opcode.
2738
2739     Op0 = Node->getOperand(0);
2740     Op1 = Node->getOperand(1);
2741     if (getRegPressure(Op0) > getRegPressure(Op1)) {
2742       Tmp1 = SelectExpr(Op0);
2743       Tmp2 = SelectExpr(Op1);
2744     } else {
2745       Tmp2 = SelectExpr(Op1);
2746       Tmp1 = SelectExpr(Op0);
2747     }
2748
2749     // FIXME: Implement folding of loads into the memory operands here!
2750     BuildMI(BB, MovOpc, 1, LowReg).addReg(Tmp1);
2751     BuildMI(BB, Opc, 1).addReg(Tmp2);
2752     BuildMI(BB, MovOpc, 1, Result).addReg(HiReg);
2753     return Result;
2754   }
2755
2756   case ISD::SUB:
2757   case ISD::MUL:
2758   case ISD::AND:
2759   case ISD::OR:
2760   case ISD::XOR: {
2761     static const unsigned SUBTab[] = {
2762       X86::SUB8ri, X86::SUB16ri, X86::SUB32ri, 0, 0,
2763       X86::SUB8rm, X86::SUB16rm, X86::SUB32rm, X86::FSUB32m, X86::FSUB64m,
2764       X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, X86::FpSUB  , X86::FpSUB,
2765     };
2766     static const unsigned SSE_SUBTab[] = {
2767       X86::SUB8ri, X86::SUB16ri, X86::SUB32ri, 0, 0,
2768       X86::SUB8rm, X86::SUB16rm, X86::SUB32rm, X86::SUBSSrm, X86::SUBSDrm,
2769       X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, X86::SUBSSrr, X86::SUBSDrr,
2770     };
2771     static const unsigned MULTab[] = {
2772       0, X86::IMUL16rri, X86::IMUL32rri, 0, 0,
2773       0, X86::IMUL16rm , X86::IMUL32rm, X86::FMUL32m, X86::FMUL64m,
2774       0, X86::IMUL16rr , X86::IMUL32rr, X86::FpMUL  , X86::FpMUL,
2775     };
2776     static const unsigned SSE_MULTab[] = {
2777       0, X86::IMUL16rri, X86::IMUL32rri, 0, 0,
2778       0, X86::IMUL16rm , X86::IMUL32rm, X86::MULSSrm, X86::MULSDrm,
2779       0, X86::IMUL16rr , X86::IMUL32rr, X86::MULSSrr, X86::MULSDrr,
2780     };
2781     static const unsigned ANDTab[] = {
2782       X86::AND8ri, X86::AND16ri, X86::AND32ri, 0, 0,
2783       X86::AND8rm, X86::AND16rm, X86::AND32rm, 0, 0,
2784       X86::AND8rr, X86::AND16rr, X86::AND32rr, 0, 0,
2785     };
2786     static const unsigned ORTab[] = {
2787       X86::OR8ri, X86::OR16ri, X86::OR32ri, 0, 0,
2788       X86::OR8rm, X86::OR16rm, X86::OR32rm, 0, 0,
2789       X86::OR8rr, X86::OR16rr, X86::OR32rr, 0, 0,
2790     };
2791     static const unsigned XORTab[] = {
2792       X86::XOR8ri, X86::XOR16ri, X86::XOR32ri, 0, 0,
2793       X86::XOR8rm, X86::XOR16rm, X86::XOR32rm, 0, 0,
2794       X86::XOR8rr, X86::XOR16rr, X86::XOR32rr, 0, 0,
2795     };
2796
2797     Op0 = Node->getOperand(0);
2798     Op1 = Node->getOperand(1);
2799
2800     if (Node->getOpcode() == ISD::OR && Op0.hasOneUse() && Op1.hasOneUse())
2801       if (EmitOrOpOp(Op0, Op1, Result)) // Match SHLD, SHRD, and rotates.
2802         return Result;
2803
2804     if (Node->getOpcode() == ISD::SUB)
2805       if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(0)))
2806         if (CN->isNullValue()) {   // 0 - N -> neg N
2807           switch (N.getValueType()) {
2808           default: assert(0 && "Cannot sub this type!");
2809           case MVT::i1:
2810           case MVT::i8:  Opc = X86::NEG8r;  break;
2811           case MVT::i16: Opc = X86::NEG16r; break;
2812           case MVT::i32: Opc = X86::NEG32r; break;
2813           }
2814           Tmp1 = SelectExpr(N.getOperand(1));
2815           BuildMI(BB, Opc, 1, Result).addReg(Tmp1);
2816           return Result;
2817         }
2818
2819     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2820       if (CN->isAllOnesValue() && Node->getOpcode() == ISD::XOR) {
2821         Opc = 0;
2822         switch (N.getValueType()) {
2823         default: assert(0 && "Cannot add this type!");
2824         case MVT::i1:  break;  // Not supported, don't invert upper bits!
2825         case MVT::i8:  Opc = X86::NOT8r;  break;
2826         case MVT::i16: Opc = X86::NOT16r; break;
2827         case MVT::i32: Opc = X86::NOT32r; break;
2828         }
2829         if (Opc) {
2830           Tmp1 = SelectExpr(Op0);
2831           BuildMI(BB, Opc, 1, Result).addReg(Tmp1);
2832           return Result;
2833         }
2834       }
2835
2836       // Fold common multiplies into LEA instructions.
2837       if (Node->getOpcode() == ISD::MUL && N.getValueType() == MVT::i32) {
2838         switch ((int)CN->getValue()) {
2839         default: break;
2840         case 3:
2841         case 5:
2842         case 9:
2843           // Remove N from exprmap so SelectAddress doesn't get confused.
2844           ExprMap.erase(N);
2845           X86AddressMode AM;
2846           SelectAddress(N, AM);
2847           // Restore it to the map.
2848           ExprMap[N] = Result;
2849           addFullAddress(BuildMI(BB, X86::LEA32r, 4, Result), AM);
2850           return Result;
2851         }
2852       }
2853
2854       switch (N.getValueType()) {
2855       default: assert(0 && "Cannot xor this type!");
2856       case MVT::i1:
2857       case MVT::i8:  Opc = 0; break;
2858       case MVT::i16: Opc = 1; break;
2859       case MVT::i32: Opc = 2; break;
2860       }
2861       switch (Node->getOpcode()) {
2862       default: assert(0 && "Unreachable!");
2863       case ISD::SUB: Opc = X86ScalarSSE ? SSE_SUBTab[Opc] : SUBTab[Opc]; break;
2864       case ISD::MUL: Opc = X86ScalarSSE ? SSE_MULTab[Opc] : MULTab[Opc]; break;
2865       case ISD::AND: Opc = ANDTab[Opc]; break;
2866       case ISD::OR:  Opc =  ORTab[Opc]; break;
2867       case ISD::XOR: Opc = XORTab[Opc]; break;
2868       }
2869       if (Opc) {  // Can't fold MUL:i8 R, imm
2870         Tmp1 = SelectExpr(Op0);
2871         BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
2872         return Result;
2873       }
2874     }
2875
2876     if (isFoldableLoad(Op0, Op1, true))
2877       if (Node->getOpcode() != ISD::SUB) {
2878         std::swap(Op0, Op1);
2879         goto FoldOps;
2880       } else {
2881         // For FP, emit 'reverse' subract, with a memory operand.
2882         if (N.getValueType() == MVT::f64 && !X86ScalarSSE) {
2883           if (Op0.getOpcode() == ISD::EXTLOAD)
2884             Opc = X86::FSUBR32m;
2885           else
2886             Opc = X86::FSUBR64m;
2887
2888           X86AddressMode AM;
2889           EmitFoldedLoad(Op0, AM);
2890           Tmp1 = SelectExpr(Op1);
2891           addFullAddress(BuildMI(BB, Opc, 5, Result).addReg(Tmp1), AM);
2892           return Result;
2893         }
2894       }
2895
2896     if (isFoldableLoad(Op1, Op0, true)) {
2897     FoldOps:
2898       switch (N.getValueType()) {
2899       default: assert(0 && "Cannot operate on this type!");
2900       case MVT::i1:
2901       case MVT::i8:  Opc = 5; break;
2902       case MVT::i16: Opc = 6; break;
2903       case MVT::i32: Opc = 7; break;
2904       case MVT::f32: Opc = 8; break;
2905         // For F64, handle promoted load operations (from F32) as well!
2906       case MVT::f64:
2907         assert((!X86ScalarSSE || Op1.getOpcode() == ISD::LOAD) &&
2908                "SSE load should have been promoted");
2909         Opc = Op1.getOpcode() == ISD::LOAD ? 9 : 8; break;
2910       }
2911       switch (Node->getOpcode()) {
2912       default: assert(0 && "Unreachable!");
2913       case ISD::SUB: Opc = X86ScalarSSE ? SSE_SUBTab[Opc] : SUBTab[Opc]; break;
2914       case ISD::MUL: Opc = X86ScalarSSE ? SSE_MULTab[Opc] : MULTab[Opc]; break;
2915       case ISD::AND: Opc = ANDTab[Opc]; break;
2916       case ISD::OR:  Opc =  ORTab[Opc]; break;
2917       case ISD::XOR: Opc = XORTab[Opc]; break;
2918       }
2919
2920       X86AddressMode AM;
2921       EmitFoldedLoad(Op1, AM);
2922       Tmp1 = SelectExpr(Op0);
2923       if (Opc) {
2924         addFullAddress(BuildMI(BB, Opc, 5, Result).addReg(Tmp1), AM);
2925       } else {
2926         assert(Node->getOpcode() == ISD::MUL &&
2927                N.getValueType() == MVT::i8 && "Unexpected situation!");
2928         // Must use the MUL instruction, which forces use of AL.
2929         BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(Tmp1);
2930         addFullAddress(BuildMI(BB, X86::MUL8m, 1), AM);
2931         BuildMI(BB, X86::MOV8rr, 1, Result).addReg(X86::AL);
2932       }
2933       return Result;
2934     }
2935
2936     if (getRegPressure(Op0) > getRegPressure(Op1)) {
2937       Tmp1 = SelectExpr(Op0);
2938       Tmp2 = SelectExpr(Op1);
2939     } else {
2940       Tmp2 = SelectExpr(Op1);
2941       Tmp1 = SelectExpr(Op0);
2942     }
2943
2944     switch (N.getValueType()) {
2945     default: assert(0 && "Cannot add this type!");
2946     case MVT::i1:
2947     case MVT::i8:  Opc = 10; break;
2948     case MVT::i16: Opc = 11; break;
2949     case MVT::i32: Opc = 12; break;
2950     case MVT::f32: Opc = 13; break;
2951     case MVT::f64: Opc = 14; break;
2952     }
2953     switch (Node->getOpcode()) {
2954     default: assert(0 && "Unreachable!");
2955     case ISD::SUB: Opc = X86ScalarSSE ? SSE_SUBTab[Opc] : SUBTab[Opc]; break;
2956     case ISD::MUL: Opc = X86ScalarSSE ? SSE_MULTab[Opc] : MULTab[Opc]; break;
2957     case ISD::AND: Opc = ANDTab[Opc]; break;
2958     case ISD::OR:  Opc =  ORTab[Opc]; break;
2959     case ISD::XOR: Opc = XORTab[Opc]; break;
2960     }
2961     if (Opc) {
2962       BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
2963     } else {
2964       assert(Node->getOpcode() == ISD::MUL &&
2965              N.getValueType() == MVT::i8 && "Unexpected situation!");
2966       // Must use the MUL instruction, which forces use of AL.
2967       BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(Tmp1);
2968       BuildMI(BB, X86::MUL8r, 1).addReg(Tmp2);
2969       BuildMI(BB, X86::MOV8rr, 1, Result).addReg(X86::AL);
2970     }
2971     return Result;
2972   }
2973   case ISD::ADD_PARTS:
2974   case ISD::SUB_PARTS: {
2975     assert(N.getNumOperands() == 4 && N.getValueType() == MVT::i32 &&
2976            "Not an i64 add/sub!");
2977     // Emit all of the operands.
2978     std::vector<unsigned> InVals;
2979     for (unsigned i = 0, e = N.getNumOperands(); i != e; ++i)
2980       InVals.push_back(SelectExpr(N.getOperand(i)));
2981     if (N.getOpcode() == ISD::ADD_PARTS) {
2982       BuildMI(BB, X86::ADD32rr, 2, Result).addReg(InVals[0]).addReg(InVals[2]);
2983       BuildMI(BB, X86::ADC32rr,2,Result+1).addReg(InVals[1]).addReg(InVals[3]);
2984     } else {
2985       BuildMI(BB, X86::SUB32rr, 2, Result).addReg(InVals[0]).addReg(InVals[2]);
2986       BuildMI(BB, X86::SBB32rr, 2,Result+1).addReg(InVals[1]).addReg(InVals[3]);
2987     }
2988     return Result+N.ResNo;
2989   }
2990
2991   case ISD::SHL_PARTS:
2992   case ISD::SRA_PARTS:
2993   case ISD::SRL_PARTS: {
2994     assert(N.getNumOperands() == 3 && N.getValueType() == MVT::i32 &&
2995            "Not an i64 shift!");
2996     unsigned ShiftOpLo = SelectExpr(N.getOperand(0));
2997     unsigned ShiftOpHi = SelectExpr(N.getOperand(1));
2998     unsigned TmpReg = MakeReg(MVT::i32);
2999     if (N.getOpcode() == ISD::SRA_PARTS) {
3000       // If this is a SHR of a Long, then we need to do funny sign extension
3001       // stuff.  TmpReg gets the value to use as the high-part if we are
3002       // shifting more than 32 bits.
3003       BuildMI(BB, X86::SAR32ri, 2, TmpReg).addReg(ShiftOpHi).addImm(31);
3004     } else {
3005       // Other shifts use a fixed zero value if the shift is more than 32 bits.
3006       BuildMI(BB, X86::MOV32ri, 1, TmpReg).addImm(0);
3007     }
3008
3009     // Initialize CL with the shift amount.
3010     unsigned ShiftAmountReg = SelectExpr(N.getOperand(2));
3011     BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg);
3012
3013     unsigned TmpReg2 = MakeReg(MVT::i32);
3014     unsigned TmpReg3 = MakeReg(MVT::i32);
3015     if (N.getOpcode() == ISD::SHL_PARTS) {
3016       // TmpReg2 = shld inHi, inLo
3017       BuildMI(BB, X86::SHLD32rrCL, 2,TmpReg2).addReg(ShiftOpHi)
3018         .addReg(ShiftOpLo);
3019       // TmpReg3 = shl  inLo, CL
3020       BuildMI(BB, X86::SHL32rCL, 1, TmpReg3).addReg(ShiftOpLo);
3021
3022       // Set the flags to indicate whether the shift was by more than 32 bits.
3023       BuildMI(BB, X86::TEST8ri, 2).addReg(X86::CL).addImm(32);
3024
3025       // DestHi = (>32) ? TmpReg3 : TmpReg2;
3026       BuildMI(BB, X86::CMOVNE32rr, 2,
3027               Result+1).addReg(TmpReg2).addReg(TmpReg3);
3028       // DestLo = (>32) ? TmpReg : TmpReg3;
3029       BuildMI(BB, X86::CMOVNE32rr, 2,
3030               Result).addReg(TmpReg3).addReg(TmpReg);
3031     } else {
3032       // TmpReg2 = shrd inLo, inHi
3033       BuildMI(BB, X86::SHRD32rrCL,2,TmpReg2).addReg(ShiftOpLo)
3034         .addReg(ShiftOpHi);
3035       // TmpReg3 = s[ah]r  inHi, CL
3036       BuildMI(BB, N.getOpcode() == ISD::SRA_PARTS ? X86::SAR32rCL
3037                                                   : X86::SHR32rCL, 1, TmpReg3)
3038         .addReg(ShiftOpHi);
3039
3040       // Set the flags to indicate whether the shift was by more than 32 bits.
3041       BuildMI(BB, X86::TEST8ri, 2).addReg(X86::CL).addImm(32);
3042
3043       // DestLo = (>32) ? TmpReg3 : TmpReg2;
3044       BuildMI(BB, X86::CMOVNE32rr, 2,
3045               Result).addReg(TmpReg2).addReg(TmpReg3);
3046
3047       // DestHi = (>32) ? TmpReg : TmpReg3;
3048       BuildMI(BB, X86::CMOVNE32rr, 2,
3049               Result+1).addReg(TmpReg3).addReg(TmpReg);
3050     }
3051     return Result+N.ResNo;
3052   }
3053
3054   case ISD::SELECT:
3055     if (getRegPressure(N.getOperand(1)) > getRegPressure(N.getOperand(2))) {
3056       Tmp2 = SelectExpr(N.getOperand(1));
3057       Tmp3 = SelectExpr(N.getOperand(2));
3058     } else {
3059       Tmp3 = SelectExpr(N.getOperand(2));
3060       Tmp2 = SelectExpr(N.getOperand(1));
3061     }
3062     EmitSelectCC(N.getOperand(0), N.getValueType(), Tmp2, Tmp3, Result);
3063     return Result;
3064
3065   case ISD::SDIV:
3066   case ISD::UDIV:
3067   case ISD::SREM:
3068   case ISD::UREM: {
3069     assert((N.getOpcode() != ISD::SREM || MVT::isInteger(N.getValueType())) &&
3070            "We don't support this operator!");
3071
3072     if (N.getOpcode() == ISD::SDIV) {
3073       // We can fold loads into FpDIVs, but not really into any others.
3074       if (N.getValueType() == MVT::f64 && !X86ScalarSSE) {
3075         // Check for reversed and unreversed DIV.
3076         if (isFoldableLoad(N.getOperand(0), N.getOperand(1), true)) {
3077           if (N.getOperand(0).getOpcode() == ISD::EXTLOAD)
3078             Opc = X86::FDIVR32m;
3079           else
3080             Opc = X86::FDIVR64m;
3081           X86AddressMode AM;
3082           EmitFoldedLoad(N.getOperand(0), AM);
3083           Tmp1 = SelectExpr(N.getOperand(1));
3084           addFullAddress(BuildMI(BB, Opc, 5, Result).addReg(Tmp1), AM);
3085           return Result;
3086         } else if (isFoldableLoad(N.getOperand(1), N.getOperand(0), true) &&
3087                    N.getOperand(1).getOpcode() == ISD::LOAD) {
3088           if (N.getOperand(1).getOpcode() == ISD::EXTLOAD)
3089             Opc = X86::FDIV32m;
3090           else
3091             Opc = X86::FDIV64m;
3092           X86AddressMode AM;
3093           EmitFoldedLoad(N.getOperand(1), AM);
3094           Tmp1 = SelectExpr(N.getOperand(0));
3095           addFullAddress(BuildMI(BB, Opc, 5, Result).addReg(Tmp1), AM);
3096           return Result;
3097         }
3098       }
3099
3100       if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
3101         // FIXME: These special cases should be handled by the lowering impl!
3102         unsigned RHS = CN->getValue();
3103         bool isNeg = false;
3104         if ((int)RHS < 0) {
3105           isNeg = true;
3106           RHS = -RHS;
3107         }
3108         if (RHS && (RHS & (RHS-1)) == 0) {   // Signed division by power of 2?
3109           unsigned Log = log2(RHS);
3110           unsigned SAROpc, SHROpc, ADDOpc, NEGOpc;
3111           switch (N.getValueType()) {
3112           default: assert("Unknown type to signed divide!");
3113           case MVT::i8:
3114             SAROpc = X86::SAR8ri;
3115             SHROpc = X86::SHR8ri;
3116             ADDOpc = X86::ADD8rr;
3117             NEGOpc = X86::NEG8r;
3118             break;
3119           case MVT::i16:
3120             SAROpc = X86::SAR16ri;
3121             SHROpc = X86::SHR16ri;
3122             ADDOpc = X86::ADD16rr;
3123             NEGOpc = X86::NEG16r;
3124             break;
3125           case MVT::i32:
3126             SAROpc = X86::SAR32ri;
3127             SHROpc = X86::SHR32ri;
3128             ADDOpc = X86::ADD32rr;
3129             NEGOpc = X86::NEG32r;
3130             break;
3131           }
3132           unsigned RegSize = MVT::getSizeInBits(N.getValueType());
3133           Tmp1 = SelectExpr(N.getOperand(0));
3134           unsigned TmpReg;
3135           if (Log != 1) {
3136             TmpReg = MakeReg(N.getValueType());
3137             BuildMI(BB, SAROpc, 2, TmpReg).addReg(Tmp1).addImm(Log-1);
3138           } else {
3139             TmpReg = Tmp1;
3140           }
3141           unsigned TmpReg2 = MakeReg(N.getValueType());
3142           BuildMI(BB, SHROpc, 2, TmpReg2).addReg(TmpReg).addImm(RegSize-Log);
3143           unsigned TmpReg3 = MakeReg(N.getValueType());
3144           BuildMI(BB, ADDOpc, 2, TmpReg3).addReg(Tmp1).addReg(TmpReg2);
3145
3146           unsigned TmpReg4 = isNeg ? MakeReg(N.getValueType()) : Result;
3147           BuildMI(BB, SAROpc, 2, TmpReg4).addReg(TmpReg3).addImm(Log);
3148           if (isNeg)
3149             BuildMI(BB, NEGOpc, 1, Result).addReg(TmpReg4);
3150           return Result;
3151         }
3152       }
3153     }
3154
3155     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
3156       Tmp1 = SelectExpr(N.getOperand(0));
3157       Tmp2 = SelectExpr(N.getOperand(1));
3158     } else {
3159       Tmp2 = SelectExpr(N.getOperand(1));
3160       Tmp1 = SelectExpr(N.getOperand(0));
3161     }
3162
3163     bool isSigned = N.getOpcode() == ISD::SDIV || N.getOpcode() == ISD::SREM;
3164     bool isDiv    = N.getOpcode() == ISD::SDIV || N.getOpcode() == ISD::UDIV;
3165     unsigned LoReg, HiReg, DivOpcode, MovOpcode, ClrOpcode, SExtOpcode;
3166     switch (N.getValueType()) {
3167     default: assert(0 && "Cannot sdiv this type!");
3168     case MVT::i8:
3169       DivOpcode = isSigned ? X86::IDIV8r : X86::DIV8r;
3170       LoReg = X86::AL;
3171       HiReg = X86::AH;
3172       MovOpcode = X86::MOV8rr;
3173       ClrOpcode = X86::MOV8ri;
3174       SExtOpcode = X86::CBW;
3175       break;
3176     case MVT::i16:
3177       DivOpcode = isSigned ? X86::IDIV16r : X86::DIV16r;
3178       LoReg = X86::AX;
3179       HiReg = X86::DX;
3180       MovOpcode = X86::MOV16rr;
3181       ClrOpcode = X86::MOV16ri;
3182       SExtOpcode = X86::CWD;
3183       break;
3184     case MVT::i32:
3185       DivOpcode = isSigned ? X86::IDIV32r : X86::DIV32r;
3186       LoReg = X86::EAX;
3187       HiReg = X86::EDX;
3188       MovOpcode = X86::MOV32rr;
3189       ClrOpcode = X86::MOV32ri;
3190       SExtOpcode = X86::CDQ;
3191       break;
3192     case MVT::f32:
3193       BuildMI(BB, X86::DIVSSrr, 2, Result).addReg(Tmp1).addReg(Tmp2);
3194       return Result;
3195     case MVT::f64:
3196       Opc = X86ScalarSSE ? X86::DIVSDrr : X86::FpDIV;
3197       BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
3198       return Result;
3199     }
3200
3201     // Set up the low part.
3202     BuildMI(BB, MovOpcode, 1, LoReg).addReg(Tmp1);
3203
3204     if (isSigned) {
3205       // Sign extend the low part into the high part.
3206       BuildMI(BB, SExtOpcode, 0);
3207     } else {
3208       // Zero out the high part, effectively zero extending the input.
3209       BuildMI(BB, ClrOpcode, 1, HiReg).addImm(0);
3210     }
3211
3212     // Emit the DIV/IDIV instruction.
3213     BuildMI(BB, DivOpcode, 1).addReg(Tmp2);
3214
3215     // Get the result of the divide or rem.
3216     BuildMI(BB, MovOpcode, 1, Result).addReg(isDiv ? LoReg : HiReg);
3217     return Result;
3218   }
3219
3220   case ISD::SHL:
3221     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
3222       if (CN->getValue() == 1) {   // X = SHL Y, 1  -> X = ADD Y, Y
3223         switch (N.getValueType()) {
3224         default: assert(0 && "Cannot shift this type!");
3225         case MVT::i8:  Opc = X86::ADD8rr; break;
3226         case MVT::i16: Opc = X86::ADD16rr; break;
3227         case MVT::i32: Opc = X86::ADD32rr; break;
3228         }
3229         Tmp1 = SelectExpr(N.getOperand(0));
3230         BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp1);
3231         return Result;
3232       }
3233
3234       switch (N.getValueType()) {
3235       default: assert(0 && "Cannot shift this type!");
3236       case MVT::i8:  Opc = X86::SHL8ri; break;
3237       case MVT::i16: Opc = X86::SHL16ri; break;
3238       case MVT::i32: Opc = X86::SHL32ri; break;
3239       }
3240       Tmp1 = SelectExpr(N.getOperand(0));
3241       BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
3242       return Result;
3243     }
3244
3245     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
3246       Tmp1 = SelectExpr(N.getOperand(0));
3247       Tmp2 = SelectExpr(N.getOperand(1));
3248     } else {
3249       Tmp2 = SelectExpr(N.getOperand(1));
3250       Tmp1 = SelectExpr(N.getOperand(0));
3251     }
3252
3253     switch (N.getValueType()) {
3254     default: assert(0 && "Cannot shift this type!");
3255     case MVT::i8 : Opc = X86::SHL8rCL; break;
3256     case MVT::i16: Opc = X86::SHL16rCL; break;
3257     case MVT::i32: Opc = X86::SHL32rCL; break;
3258     }
3259     BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(Tmp2);
3260     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
3261     return Result;
3262   case ISD::SRL:
3263     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
3264       switch (N.getValueType()) {
3265       default: assert(0 && "Cannot shift this type!");
3266       case MVT::i8:  Opc = X86::SHR8ri; break;
3267       case MVT::i16: Opc = X86::SHR16ri; break;
3268       case MVT::i32: Opc = X86::SHR32ri; break;
3269       }
3270       Tmp1 = SelectExpr(N.getOperand(0));
3271       BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
3272       return Result;
3273     }
3274
3275     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
3276       Tmp1 = SelectExpr(N.getOperand(0));
3277       Tmp2 = SelectExpr(N.getOperand(1));
3278     } else {
3279       Tmp2 = SelectExpr(N.getOperand(1));
3280       Tmp1 = SelectExpr(N.getOperand(0));
3281     }
3282
3283     switch (N.getValueType()) {
3284     default: assert(0 && "Cannot shift this type!");
3285     case MVT::i8 : Opc = X86::SHR8rCL; break;
3286     case MVT::i16: Opc = X86::SHR16rCL; break;
3287     case MVT::i32: Opc = X86::SHR32rCL; break;
3288     }
3289     BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(Tmp2);
3290     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
3291     return Result;
3292   case ISD::SRA:
3293     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
3294       switch (N.getValueType()) {
3295       default: assert(0 && "Cannot shift this type!");
3296       case MVT::i8:  Opc = X86::SAR8ri; break;
3297       case MVT::i16: Opc = X86::SAR16ri; break;
3298       case MVT::i32: Opc = X86::SAR32ri; break;
3299       }
3300       Tmp1 = SelectExpr(N.getOperand(0));
3301       BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addImm(CN->getValue());
3302       return Result;
3303     }
3304
3305     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
3306       Tmp1 = SelectExpr(N.getOperand(0));
3307       Tmp2 = SelectExpr(N.getOperand(1));
3308     } else {
3309       Tmp2 = SelectExpr(N.getOperand(1));
3310       Tmp1 = SelectExpr(N.getOperand(0));
3311     }
3312
3313     switch (N.getValueType()) {
3314     default: assert(0 && "Cannot shift this type!");
3315     case MVT::i8 : Opc = X86::SAR8rCL; break;
3316     case MVT::i16: Opc = X86::SAR16rCL; break;
3317     case MVT::i32: Opc = X86::SAR32rCL; break;
3318     }
3319     BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(Tmp2);
3320     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
3321     return Result;
3322
3323   case ISD::SETCC:
3324     EmitCMP(N.getOperand(0), N.getOperand(1), Node->hasOneUse());
3325     EmitSetCC(BB, Result, cast<SetCCSDNode>(N)->getCondition(),
3326               MVT::isFloatingPoint(N.getOperand(1).getValueType()));
3327     return Result;
3328   case ISD::LOAD:
3329     // Make sure we generate both values.
3330     if (Result != 1) {  // Generate the token
3331       if (!ExprMap.insert(std::make_pair(N.getValue(1), 1)).second)
3332         assert(0 && "Load already emitted!?");
3333     } else
3334       Result = ExprMap[N.getValue(0)] = MakeReg(N.getValue(0).getValueType());
3335
3336     switch (Node->getValueType(0)) {
3337     default: assert(0 && "Cannot load this type!");
3338     case MVT::i1:
3339     case MVT::i8:  Opc = X86::MOV8rm; break;
3340     case MVT::i16: Opc = X86::MOV16rm; break;
3341     case MVT::i32: Opc = X86::MOV32rm; break;
3342     case MVT::f32: Opc = X86::MOVSSrm; break;
3343     case MVT::f64:
3344       if (X86ScalarSSE) {
3345         Opc = X86::MOVSDrm;
3346       } else {
3347         Opc = X86::FLD64m;
3348         ContainsFPCode = true;
3349       }
3350       break;
3351     }
3352
3353     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N.getOperand(1))){
3354       Select(N.getOperand(0));
3355       addConstantPoolReference(BuildMI(BB, Opc, 4, Result), CP->getIndex());
3356     } else {
3357       X86AddressMode AM;
3358
3359       SDOperand Chain   = N.getOperand(0);
3360       SDOperand Address = N.getOperand(1);
3361       if (getRegPressure(Chain) > getRegPressure(Address)) {
3362         Select(Chain);
3363         SelectAddress(Address, AM);
3364       } else {
3365         SelectAddress(Address, AM);
3366         Select(Chain);
3367       }
3368
3369       addFullAddress(BuildMI(BB, Opc, 4, Result), AM);
3370     }
3371     return Result;
3372   case X86ISD::FILD64m:
3373     // Make sure we generate both values.
3374     assert(Result != 1 && N.getValueType() == MVT::f64);
3375     if (!ExprMap.insert(std::make_pair(N.getValue(1), 1)).second)
3376       assert(0 && "Load already emitted!?");
3377
3378     {
3379       X86AddressMode AM;
3380
3381       SDOperand Chain   = N.getOperand(0);
3382       SDOperand Address = N.getOperand(1);
3383       if (getRegPressure(Chain) > getRegPressure(Address)) {
3384         Select(Chain);
3385         SelectAddress(Address, AM);
3386       } else {
3387         SelectAddress(Address, AM);
3388         Select(Chain);
3389       }
3390
3391       addFullAddress(BuildMI(BB, X86::FILD64m, 4, Result), AM);
3392     }
3393     return Result;
3394
3395   case ISD::EXTLOAD:          // Arbitrarily codegen extloads as MOVZX*
3396   case ISD::ZEXTLOAD: {
3397     // Make sure we generate both values.
3398     if (Result != 1)
3399       ExprMap[N.getValue(1)] = 1;   // Generate the token
3400     else
3401       Result = ExprMap[N.getValue(0)] = MakeReg(N.getValue(0).getValueType());
3402
3403     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N.getOperand(1)))
3404       if (Node->getValueType(0) == MVT::f64) {
3405         assert(cast<VTSDNode>(Node->getOperand(3))->getVT() == MVT::f32 &&
3406                "Bad EXTLOAD!");
3407         addConstantPoolReference(BuildMI(BB, X86::FLD32m, 4, Result),
3408                                  CP->getIndex());
3409         return Result;
3410       }
3411
3412     X86AddressMode AM;
3413     if (getRegPressure(Node->getOperand(0)) >
3414            getRegPressure(Node->getOperand(1))) {
3415       Select(Node->getOperand(0)); // chain
3416       SelectAddress(Node->getOperand(1), AM);
3417     } else {
3418       SelectAddress(Node->getOperand(1), AM);
3419       Select(Node->getOperand(0)); // chain
3420     }
3421
3422     switch (Node->getValueType(0)) {
3423     default: assert(0 && "Unknown type to sign extend to.");
3424     case MVT::f64:
3425       assert(cast<VTSDNode>(Node->getOperand(3))->getVT() == MVT::f32 &&
3426              "Bad EXTLOAD!");
3427       addFullAddress(BuildMI(BB, X86::FLD32m, 5, Result), AM);
3428       break;
3429     case MVT::i32:
3430       switch (cast<VTSDNode>(Node->getOperand(3))->getVT()) {
3431       default:
3432         assert(0 && "Bad zero extend!");
3433       case MVT::i1:
3434       case MVT::i8:
3435         addFullAddress(BuildMI(BB, X86::MOVZX32rm8, 5, Result), AM);
3436         break;
3437       case MVT::i16:
3438         addFullAddress(BuildMI(BB, X86::MOVZX32rm16, 5, Result), AM);
3439         break;
3440       }
3441       break;
3442     case MVT::i16:
3443       assert(cast<VTSDNode>(Node->getOperand(3))->getVT() <= MVT::i8 &&
3444              "Bad zero extend!");
3445       addFullAddress(BuildMI(BB, X86::MOVSX16rm8, 5, Result), AM);
3446       break;
3447     case MVT::i8:
3448       assert(cast<VTSDNode>(Node->getOperand(3))->getVT() == MVT::i1 &&
3449              "Bad zero extend!");
3450       addFullAddress(BuildMI(BB, X86::MOV8rm, 5, Result), AM);
3451       break;
3452     }
3453     return Result;
3454   }
3455   case ISD::SEXTLOAD: {
3456     // Make sure we generate both values.
3457     if (Result != 1)
3458       ExprMap[N.getValue(1)] = 1;   // Generate the token
3459     else
3460       Result = ExprMap[N.getValue(0)] = MakeReg(N.getValue(0).getValueType());
3461
3462     X86AddressMode AM;
3463     if (getRegPressure(Node->getOperand(0)) >
3464            getRegPressure(Node->getOperand(1))) {
3465       Select(Node->getOperand(0)); // chain
3466       SelectAddress(Node->getOperand(1), AM);
3467     } else {
3468       SelectAddress(Node->getOperand(1), AM);
3469       Select(Node->getOperand(0)); // chain
3470     }
3471
3472     switch (Node->getValueType(0)) {
3473     case MVT::i8: assert(0 && "Cannot sign extend from bool!");
3474     default: assert(0 && "Unknown type to sign extend to.");
3475     case MVT::i32:
3476       switch (cast<VTSDNode>(Node->getOperand(3))->getVT()) {
3477       default:
3478       case MVT::i1: assert(0 && "Cannot sign extend from bool!");
3479       case MVT::i8:
3480         addFullAddress(BuildMI(BB, X86::MOVSX32rm8, 5, Result), AM);
3481         break;
3482       case MVT::i16:
3483         addFullAddress(BuildMI(BB, X86::MOVSX32rm16, 5, Result), AM);
3484         break;
3485       }
3486       break;
3487     case MVT::i16:
3488       assert(cast<VTSDNode>(Node->getOperand(3))->getVT() == MVT::i8 &&
3489              "Cannot sign extend from bool!");
3490       addFullAddress(BuildMI(BB, X86::MOVSX16rm8, 5, Result), AM);
3491       break;
3492     }
3493     return Result;
3494   }
3495
3496   case ISD::DYNAMIC_STACKALLOC:
3497     // Generate both result values.
3498     if (Result != 1)
3499       ExprMap[N.getValue(1)] = 1;   // Generate the token
3500     else
3501       Result = ExprMap[N.getValue(0)] = MakeReg(N.getValue(0).getValueType());
3502
3503     // FIXME: We are currently ignoring the requested alignment for handling
3504     // greater than the stack alignment.  This will need to be revisited at some
3505     // point.  Align = N.getOperand(2);
3506
3507     if (!isa<ConstantSDNode>(N.getOperand(2)) ||
3508         cast<ConstantSDNode>(N.getOperand(2))->getValue() != 0) {
3509       std::cerr << "Cannot allocate stack object with greater alignment than"
3510                 << " the stack alignment yet!";
3511       abort();
3512     }
3513
3514     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
3515       Select(N.getOperand(0));
3516       BuildMI(BB, X86::SUB32ri, 2, X86::ESP).addReg(X86::ESP)
3517         .addImm(CN->getValue());
3518     } else {
3519       if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
3520         Select(N.getOperand(0));
3521         Tmp1 = SelectExpr(N.getOperand(1));
3522       } else {
3523         Tmp1 = SelectExpr(N.getOperand(1));
3524         Select(N.getOperand(0));
3525       }
3526
3527       // Subtract size from stack pointer, thereby allocating some space.
3528       BuildMI(BB, X86::SUB32rr, 2, X86::ESP).addReg(X86::ESP).addReg(Tmp1);
3529     }
3530
3531     // Put a pointer to the space into the result register, by copying the stack
3532     // pointer.
3533     BuildMI(BB, X86::MOV32rr, 1, Result).addReg(X86::ESP);
3534     return Result;
3535
3536   case X86ISD::TAILCALL:
3537   case X86ISD::CALL: {
3538     // The chain for this call is now lowered.
3539     ExprMap.insert(std::make_pair(N.getValue(0), 1));
3540
3541     bool isDirect = isa<GlobalAddressSDNode>(N.getOperand(1)) ||
3542                     isa<ExternalSymbolSDNode>(N.getOperand(1));
3543     unsigned Callee = 0;
3544     if (isDirect) {
3545       Select(N.getOperand(0));
3546     } else {
3547       if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
3548         Select(N.getOperand(0));
3549         Callee = SelectExpr(N.getOperand(1));
3550       } else {
3551         Callee = SelectExpr(N.getOperand(1));
3552         Select(N.getOperand(0));
3553       }
3554     }
3555
3556     // If this call has values to pass in registers, do so now.
3557     if (Node->getNumOperands() > 4) {
3558       // The first value is passed in (a part of) EAX, the second in EDX.
3559       unsigned RegOp1 = SelectExpr(N.getOperand(4));
3560       unsigned RegOp2 =
3561         Node->getNumOperands() > 5 ? SelectExpr(N.getOperand(5)) : 0;
3562
3563       switch (N.getOperand(4).getValueType()) {
3564       default: assert(0 && "Bad thing to pass in regs");
3565       case MVT::i1:
3566       case MVT::i8:  BuildMI(BB, X86::MOV8rr , 1,X86::AL).addReg(RegOp1); break;
3567       case MVT::i16: BuildMI(BB, X86::MOV16rr, 1,X86::AX).addReg(RegOp1); break;
3568       case MVT::i32: BuildMI(BB, X86::MOV32rr, 1,X86::EAX).addReg(RegOp1);break;
3569       }
3570       if (RegOp2)
3571         switch (N.getOperand(5).getValueType()) {
3572         default: assert(0 && "Bad thing to pass in regs");
3573         case MVT::i1:
3574         case MVT::i8:
3575           BuildMI(BB, X86::MOV8rr , 1, X86::DL).addReg(RegOp2);
3576           break;
3577         case MVT::i16:
3578           BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(RegOp2);
3579           break;
3580         case MVT::i32:
3581           BuildMI(BB, X86::MOV32rr, 1, X86::EDX).addReg(RegOp2);
3582           break;
3583         }
3584     }
3585
3586     if (GlobalAddressSDNode *GASD =
3587                dyn_cast<GlobalAddressSDNode>(N.getOperand(1))) {
3588       BuildMI(BB, X86::CALLpcrel32, 1).addGlobalAddress(GASD->getGlobal(),true);
3589     } else if (ExternalSymbolSDNode *ESSDN =
3590                dyn_cast<ExternalSymbolSDNode>(N.getOperand(1))) {
3591       BuildMI(BB, X86::CALLpcrel32,
3592               1).addExternalSymbol(ESSDN->getSymbol(), true);
3593     } else {
3594       if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
3595         Select(N.getOperand(0));
3596         Tmp1 = SelectExpr(N.getOperand(1));
3597       } else {
3598         Tmp1 = SelectExpr(N.getOperand(1));
3599         Select(N.getOperand(0));
3600       }
3601
3602       BuildMI(BB, X86::CALL32r, 1).addReg(Tmp1);
3603     }
3604
3605     // Get caller stack amount and amount the callee added to the stack pointer.
3606     Tmp1 = cast<ConstantSDNode>(N.getOperand(2))->getValue();
3607     Tmp2 = cast<ConstantSDNode>(N.getOperand(3))->getValue();
3608     BuildMI(BB, X86::ADJCALLSTACKUP, 2).addImm(Tmp1).addImm(Tmp2);
3609
3610     if (Node->getNumValues() != 1)
3611       switch (Node->getValueType(1)) {
3612       default: assert(0 && "Unknown value type for call result!");
3613       case MVT::Other: return 1;
3614       case MVT::i1:
3615       case MVT::i8:
3616         BuildMI(BB, X86::MOV8rr, 1, Result).addReg(X86::AL);
3617         break;
3618       case MVT::i16:
3619         BuildMI(BB, X86::MOV16rr, 1, Result).addReg(X86::AX);
3620         break;
3621       case MVT::i32:
3622         BuildMI(BB, X86::MOV32rr, 1, Result).addReg(X86::EAX);
3623         if (Node->getNumValues() == 3 && Node->getValueType(2) == MVT::i32)
3624           BuildMI(BB, X86::MOV32rr, 1, Result+1).addReg(X86::EDX);
3625         break;
3626       case MVT::f64:     // Floating-point return values live in %ST(0)
3627         if (X86ScalarSSE) {
3628           ContainsFPCode = true;
3629           BuildMI(BB, X86::FpGETRESULT, 1, X86::FP0);
3630
3631           unsigned Size = MVT::getSizeInBits(MVT::f64)/8;
3632           MachineFunction *F = BB->getParent();
3633           int FrameIdx = F->getFrameInfo()->CreateStackObject(Size, Size);
3634           addFrameReference(BuildMI(BB, X86::FST64m, 5), FrameIdx).addReg(X86::FP0);
3635           addFrameReference(BuildMI(BB, X86::MOVSDrm, 4, Result), FrameIdx);
3636           break;
3637         } else {
3638           ContainsFPCode = true;
3639           BuildMI(BB, X86::FpGETRESULT, 1, Result);
3640           break;
3641         }
3642       }
3643     return Result+N.ResNo-1;
3644   }
3645   case ISD::READPORT:
3646     // First, determine that the size of the operand falls within the acceptable
3647     // range for this architecture.
3648     //
3649     if (Node->getOperand(1).getValueType() != MVT::i16) {
3650       std::cerr << "llvm.readport: Address size is not 16 bits\n";
3651       exit(1);
3652     }
3653
3654     // Make sure we generate both values.
3655     if (Result != 1) {  // Generate the token
3656       if (!ExprMap.insert(std::make_pair(N.getValue(1), 1)).second)
3657         assert(0 && "readport already emitted!?");
3658     } else
3659       Result = ExprMap[N.getValue(0)] = MakeReg(N.getValue(0).getValueType());
3660
3661     Select(Node->getOperand(0));  // Select the chain.
3662
3663     // If the port is a single-byte constant, use the immediate form.
3664     if (ConstantSDNode *Port = dyn_cast<ConstantSDNode>(Node->getOperand(1)))
3665       if ((Port->getValue() & 255) == Port->getValue()) {
3666         switch (Node->getValueType(0)) {
3667         case MVT::i8:
3668           BuildMI(BB, X86::IN8ri, 1).addImm(Port->getValue());
3669           BuildMI(BB, X86::MOV8rr, 1, Result).addReg(X86::AL);
3670           return Result;
3671         case MVT::i16:
3672           BuildMI(BB, X86::IN16ri, 1).addImm(Port->getValue());
3673           BuildMI(BB, X86::MOV16rr, 1, Result).addReg(X86::AX);
3674           return Result;
3675         case MVT::i32:
3676           BuildMI(BB, X86::IN32ri, 1).addImm(Port->getValue());
3677           BuildMI(BB, X86::MOV32rr, 1, Result).addReg(X86::EAX);
3678           return Result;
3679         default: break;
3680         }
3681       }
3682
3683     // Now, move the I/O port address into the DX register and use the IN
3684     // instruction to get the input data.
3685     //
3686     Tmp1 = SelectExpr(Node->getOperand(1));
3687     BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Tmp1);
3688     switch (Node->getValueType(0)) {
3689     case MVT::i8:
3690       BuildMI(BB, X86::IN8rr, 0);
3691       BuildMI(BB, X86::MOV8rr, 1, Result).addReg(X86::AL);
3692       return Result;
3693     case MVT::i16:
3694       BuildMI(BB, X86::IN16rr, 0);
3695       BuildMI(BB, X86::MOV16rr, 1, Result).addReg(X86::AX);
3696       return Result;
3697     case MVT::i32:
3698       BuildMI(BB, X86::IN32rr, 0);
3699       BuildMI(BB, X86::MOV32rr, 1, Result).addReg(X86::EAX);
3700       return Result;
3701     default:
3702       std::cerr << "Cannot do input on this data type";
3703       exit(1);
3704     }
3705
3706   }
3707
3708   return 0;
3709 }
3710
3711 /// TryToFoldLoadOpStore - Given a store node, try to fold together a
3712 /// load/op/store instruction.  If successful return true.
3713 bool ISel::TryToFoldLoadOpStore(SDNode *Node) {
3714   assert(Node->getOpcode() == ISD::STORE && "Can only do this for stores!");
3715   SDOperand Chain  = Node->getOperand(0);
3716   SDOperand StVal  = Node->getOperand(1);
3717   SDOperand StPtr  = Node->getOperand(2);
3718
3719   // The chain has to be a load, the stored value must be an integer binary
3720   // operation with one use.
3721   if (!StVal.Val->hasOneUse() || StVal.Val->getNumOperands() != 2 ||
3722       MVT::isFloatingPoint(StVal.getValueType()))
3723     return false;
3724
3725   // Token chain must either be a factor node or the load to fold.
3726   if (Chain.getOpcode() != ISD::LOAD && Chain.getOpcode() != ISD::TokenFactor)
3727     return false;
3728
3729   SDOperand TheLoad;
3730
3731   // Check to see if there is a load from the same pointer that we're storing
3732   // to in either operand of the binop.
3733   if (StVal.getOperand(0).getOpcode() == ISD::LOAD &&
3734       StVal.getOperand(0).getOperand(1) == StPtr)
3735     TheLoad = StVal.getOperand(0);
3736   else if (StVal.getOperand(1).getOpcode() == ISD::LOAD &&
3737            StVal.getOperand(1).getOperand(1) == StPtr)
3738     TheLoad = StVal.getOperand(1);
3739   else
3740     return false;  // No matching load operand.
3741
3742   // We can only fold the load if there are no intervening side-effecting
3743   // operations.  This means that the store uses the load as its token chain, or
3744   // there are only token factor nodes in between the store and load.
3745   if (Chain != TheLoad.getValue(1)) {
3746     // Okay, the other option is that we have a store referring to (possibly
3747     // nested) token factor nodes.  For now, just try peeking through one level
3748     // of token factors to see if this is the case.
3749     bool ChainOk = false;
3750     if (Chain.getOpcode() == ISD::TokenFactor) {
3751       for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
3752         if (Chain.getOperand(i) == TheLoad.getValue(1)) {
3753           ChainOk = true;
3754           break;
3755         }
3756     }
3757
3758     if (!ChainOk) return false;
3759   }
3760
3761   if (TheLoad.getOperand(1) != StPtr)
3762     return false;
3763
3764   // Make sure that one of the operands of the binop is the load, and that the
3765   // load folds into the binop.
3766   if (((StVal.getOperand(0) != TheLoad ||
3767         !isFoldableLoad(TheLoad, StVal.getOperand(1))) &&
3768        (StVal.getOperand(1) != TheLoad ||
3769         !isFoldableLoad(TheLoad, StVal.getOperand(0)))))
3770     return false;
3771
3772   // Finally, check to see if this is one of the ops we can handle!
3773   static const unsigned ADDTAB[] = {
3774     X86::ADD8mi, X86::ADD16mi, X86::ADD32mi,
3775     X86::ADD8mr, X86::ADD16mr, X86::ADD32mr,
3776   };
3777   static const unsigned SUBTAB[] = {
3778     X86::SUB8mi, X86::SUB16mi, X86::SUB32mi,
3779     X86::SUB8mr, X86::SUB16mr, X86::SUB32mr,
3780   };
3781   static const unsigned ANDTAB[] = {
3782     X86::AND8mi, X86::AND16mi, X86::AND32mi,
3783     X86::AND8mr, X86::AND16mr, X86::AND32mr,
3784   };
3785   static const unsigned ORTAB[] = {
3786     X86::OR8mi, X86::OR16mi, X86::OR32mi,
3787     X86::OR8mr, X86::OR16mr, X86::OR32mr,
3788   };
3789   static const unsigned XORTAB[] = {
3790     X86::XOR8mi, X86::XOR16mi, X86::XOR32mi,
3791     X86::XOR8mr, X86::XOR16mr, X86::XOR32mr,
3792   };
3793   static const unsigned SHLTAB[] = {
3794     X86::SHL8mi, X86::SHL16mi, X86::SHL32mi,
3795     /*Have to put the reg in CL*/0, 0, 0,
3796   };
3797   static const unsigned SARTAB[] = {
3798     X86::SAR8mi, X86::SAR16mi, X86::SAR32mi,
3799     /*Have to put the reg in CL*/0, 0, 0,
3800   };
3801   static const unsigned SHRTAB[] = {
3802     X86::SHR8mi, X86::SHR16mi, X86::SHR32mi,
3803     /*Have to put the reg in CL*/0, 0, 0,
3804   };
3805
3806   const unsigned *TabPtr = 0;
3807   switch (StVal.getOpcode()) {
3808   default:
3809     std::cerr << "CANNOT [mem] op= val: ";
3810     StVal.Val->dump(); std::cerr << "\n";
3811   case ISD::MUL:
3812   case ISD::SDIV:
3813   case ISD::UDIV:
3814   case ISD::SREM:
3815   case ISD::UREM: return false;
3816
3817   case ISD::ADD: TabPtr = ADDTAB; break;
3818   case ISD::SUB: TabPtr = SUBTAB; break;
3819   case ISD::AND: TabPtr = ANDTAB; break;
3820   case ISD:: OR: TabPtr =  ORTAB; break;
3821   case ISD::XOR: TabPtr = XORTAB; break;
3822   case ISD::SHL: TabPtr = SHLTAB; break;
3823   case ISD::SRA: TabPtr = SARTAB; break;
3824   case ISD::SRL: TabPtr = SHRTAB; break;
3825   }
3826
3827   // Handle: [mem] op= CST
3828   SDOperand Op0 = StVal.getOperand(0);
3829   SDOperand Op1 = StVal.getOperand(1);
3830   unsigned Opc = 0;
3831   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
3832     switch (Op0.getValueType()) { // Use Op0's type because of shifts.
3833     default: break;
3834     case MVT::i1:
3835     case MVT::i8:  Opc = TabPtr[0]; break;
3836     case MVT::i16: Opc = TabPtr[1]; break;
3837     case MVT::i32: Opc = TabPtr[2]; break;
3838     }
3839
3840     if (Opc) {
3841       if (!ExprMap.insert(std::make_pair(TheLoad.getValue(1), 1)).second)
3842         assert(0 && "Already emitted?");
3843       Select(Chain);
3844
3845       X86AddressMode AM;
3846       if (getRegPressure(TheLoad.getOperand(0)) >
3847           getRegPressure(TheLoad.getOperand(1))) {
3848         Select(TheLoad.getOperand(0));
3849         SelectAddress(TheLoad.getOperand(1), AM);
3850       } else {
3851         SelectAddress(TheLoad.getOperand(1), AM);
3852         Select(TheLoad.getOperand(0));
3853       }
3854
3855       if (StVal.getOpcode() == ISD::ADD) {
3856         if (CN->getValue() == 1) {
3857           switch (Op0.getValueType()) {
3858           default: break;
3859           case MVT::i8:
3860             addFullAddress(BuildMI(BB, X86::INC8m, 4), AM);
3861             return true;
3862           case MVT::i16: Opc = TabPtr[1];
3863             addFullAddress(BuildMI(BB, X86::INC16m, 4), AM);
3864             return true;
3865           case MVT::i32: Opc = TabPtr[2];
3866             addFullAddress(BuildMI(BB, X86::INC32m, 4), AM);
3867             return true;
3868           }
3869         } else if (CN->getValue()+1 == 0) {   // [X] += -1 -> DEC [X]
3870           switch (Op0.getValueType()) {
3871           default: break;
3872           case MVT::i8:
3873             addFullAddress(BuildMI(BB, X86::DEC8m, 4), AM);
3874             return true;
3875           case MVT::i16: Opc = TabPtr[1];
3876             addFullAddress(BuildMI(BB, X86::DEC16m, 4), AM);
3877             return true;
3878           case MVT::i32: Opc = TabPtr[2];
3879             addFullAddress(BuildMI(BB, X86::DEC32m, 4), AM);
3880             return true;
3881           }
3882         }
3883       }
3884
3885       addFullAddress(BuildMI(BB, Opc, 4+1),AM).addImm(CN->getValue());
3886       return true;
3887     }
3888   }
3889
3890   // If we have [mem] = V op [mem], try to turn it into:
3891   // [mem] = [mem] op V.
3892   if (Op1 == TheLoad && StVal.getOpcode() != ISD::SUB &&
3893       StVal.getOpcode() != ISD::SHL && StVal.getOpcode() != ISD::SRA &&
3894       StVal.getOpcode() != ISD::SRL)
3895     std::swap(Op0, Op1);
3896
3897   if (Op0 != TheLoad) return false;
3898
3899   switch (Op0.getValueType()) {
3900   default: return false;
3901   case MVT::i1:
3902   case MVT::i8:  Opc = TabPtr[3]; break;
3903   case MVT::i16: Opc = TabPtr[4]; break;
3904   case MVT::i32: Opc = TabPtr[5]; break;
3905   }
3906
3907   // Table entry doesn't exist?
3908   if (Opc == 0) return false;
3909
3910   if (!ExprMap.insert(std::make_pair(TheLoad.getValue(1), 1)).second)
3911     assert(0 && "Already emitted?");
3912   Select(Chain);
3913   Select(TheLoad.getOperand(0));
3914
3915   X86AddressMode AM;
3916   SelectAddress(TheLoad.getOperand(1), AM);
3917   unsigned Reg = SelectExpr(Op1);
3918   addFullAddress(BuildMI(BB, Opc, 4+1), AM).addReg(Reg);
3919   return true;
3920 }
3921
3922 /// If node is a ret(tailcall) node, emit the specified tail call and return
3923 /// true, otherwise return false.
3924 ///
3925 /// FIXME: This whole thing should be a post-legalize optimization pass which
3926 /// recognizes and transforms the dag.  We don't want the selection phase doing
3927 /// this stuff!!
3928 ///
3929 bool ISel::EmitPotentialTailCall(SDNode *RetNode) {
3930   assert(RetNode->getOpcode() == ISD::RET && "Not a return");
3931
3932   SDOperand Chain = RetNode->getOperand(0);
3933
3934   // If this is a token factor node where one operand is a call, dig into it.
3935   SDOperand TokFactor;
3936   unsigned TokFactorOperand = 0;
3937   if (Chain.getOpcode() == ISD::TokenFactor) {
3938     for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
3939       if (Chain.getOperand(i).getOpcode() == ISD::CALLSEQ_END ||
3940           Chain.getOperand(i).getOpcode() == X86ISD::TAILCALL) {
3941         TokFactorOperand = i;
3942         TokFactor = Chain;
3943         Chain = Chain.getOperand(i);
3944         break;
3945       }
3946     if (TokFactor.Val == 0) return false;  // No call operand.
3947   }
3948
3949   // Skip the CALLSEQ_END node if present.
3950   if (Chain.getOpcode() == ISD::CALLSEQ_END)
3951     Chain = Chain.getOperand(0);
3952
3953   // Is a tailcall the last control operation that occurs before the return?
3954   if (Chain.getOpcode() != X86ISD::TAILCALL)
3955     return false;
3956
3957   // If we return a value, is it the value produced by the call?
3958   if (RetNode->getNumOperands() > 1) {
3959     // Not returning the ret val of the call?
3960     if (Chain.Val->getNumValues() == 1 ||
3961         RetNode->getOperand(1) != Chain.getValue(1))
3962       return false;
3963
3964     if (RetNode->getNumOperands() > 2) {
3965       if (Chain.Val->getNumValues() == 2 ||
3966           RetNode->getOperand(2) != Chain.getValue(2))
3967         return false;
3968     }
3969     assert(RetNode->getNumOperands() <= 3);
3970   }
3971
3972   // CalleeCallArgAmt - The total number of bytes used for the callee arg area.
3973   // For FastCC, this will always be > 0.
3974   unsigned CalleeCallArgAmt =
3975     cast<ConstantSDNode>(Chain.getOperand(2))->getValue();
3976
3977   // CalleeCallArgPopAmt - The number of bytes in the call area popped by the
3978   // callee.  For FastCC this will always be > 0, for CCC this is always 0.
3979   unsigned CalleeCallArgPopAmt =
3980     cast<ConstantSDNode>(Chain.getOperand(3))->getValue();
3981
3982   // There are several cases we can handle here.  First, if the caller and
3983   // callee are both CCC functions, we can tailcall if the callee takes <= the
3984   // number of argument bytes that the caller does.
3985   if (CalleeCallArgPopAmt == 0 &&                  // Callee is C CallingConv?
3986       X86Lowering.getBytesToPopOnReturn() == 0) {  // Caller is C CallingConv?
3987     // Check to see if caller arg area size >= callee arg area size.
3988     if (X86Lowering.getBytesCallerReserves() >= CalleeCallArgAmt) {
3989       //std::cerr << "CCC TAILCALL UNIMP!\n";
3990       // If TokFactor is non-null, emit all operands.
3991
3992       //EmitCCCToCCCTailCall(Chain.Val);
3993       //return true;
3994     }
3995     return false;
3996   }
3997
3998   // Second, if both are FastCC functions, we can always perform the tail call.
3999   if (CalleeCallArgPopAmt && X86Lowering.getBytesToPopOnReturn()) {
4000     // If TokFactor is non-null, emit all operands before the call.
4001     if (TokFactor.Val) {
4002       for (unsigned i = 0, e = TokFactor.getNumOperands(); i != e; ++i)
4003         if (i != TokFactorOperand)
4004           Select(TokFactor.getOperand(i));
4005     }
4006
4007     EmitFastCCToFastCCTailCall(Chain.Val);
4008     return true;
4009   }
4010
4011   // We don't support mixed calls, due to issues with alignment.  We could in
4012   // theory handle some mixed calls from CCC -> FastCC if the stack is properly
4013   // aligned (which depends on the number of arguments to the callee).  TODO.
4014   return false;
4015 }
4016
4017 static SDOperand GetAdjustedArgumentStores(SDOperand Chain, int Offset,
4018                                            SelectionDAG &DAG) {
4019   MVT::ValueType StoreVT;
4020   switch (Chain.getOpcode()) {
4021   case ISD::CALLSEQ_START:
4022     // If we found the start of the call sequence, we're done.  We actually
4023     // strip off the CALLSEQ_START node, to avoid generating the
4024     // ADJCALLSTACKDOWN marker for the tail call.
4025     return Chain.getOperand(0);
4026   case ISD::TokenFactor: {
4027     std::vector<SDOperand> Ops;
4028     Ops.reserve(Chain.getNumOperands());
4029     for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
4030       Ops.push_back(GetAdjustedArgumentStores(Chain.getOperand(i), Offset,DAG));
4031     return DAG.getNode(ISD::TokenFactor, MVT::Other, Ops);
4032   }
4033   case ISD::STORE:       // Normal store
4034     StoreVT = Chain.getOperand(1).getValueType();
4035     break;
4036   case ISD::TRUNCSTORE:  // FLOAT store
4037     StoreVT = cast<VTSDNode>(Chain.getOperand(4))->getVT();
4038     break;
4039   }
4040
4041   SDOperand OrigDest = Chain.getOperand(2);
4042   unsigned OrigOffset;
4043
4044   if (OrigDest.getOpcode() == ISD::CopyFromReg) {
4045     OrigOffset = 0;
4046     assert(cast<RegSDNode>(OrigDest)->getReg() == X86::ESP);
4047   } else {
4048     // We expect only (ESP+C)
4049     assert(OrigDest.getOpcode() == ISD::ADD &&
4050            isa<ConstantSDNode>(OrigDest.getOperand(1)) &&
4051            OrigDest.getOperand(0).getOpcode() == ISD::CopyFromReg &&
4052            cast<RegSDNode>(OrigDest.getOperand(0))->getReg() == X86::ESP);
4053     OrigOffset = cast<ConstantSDNode>(OrigDest.getOperand(1))->getValue();
4054   }
4055
4056   // Compute the new offset from the incoming ESP value we wish to use.
4057   unsigned NewOffset = OrigOffset + Offset;
4058
4059   unsigned OpSize = (MVT::getSizeInBits(StoreVT)+7)/8;  // Bits -> Bytes
4060   MachineFunction &MF = DAG.getMachineFunction();
4061   int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, NewOffset);
4062   SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
4063
4064   SDOperand InChain = GetAdjustedArgumentStores(Chain.getOperand(0), Offset,
4065                                                 DAG);
4066   if (Chain.getOpcode() == ISD::STORE)
4067     return DAG.getNode(ISD::STORE, MVT::Other, InChain, Chain.getOperand(1),
4068                        FIN);
4069   assert(Chain.getOpcode() == ISD::TRUNCSTORE);
4070   return DAG.getNode(ISD::TRUNCSTORE, MVT::Other, InChain, Chain.getOperand(1),
4071                      FIN, DAG.getSrcValue(NULL), DAG.getValueType(StoreVT));
4072 }
4073
4074
4075 /// EmitFastCCToFastCCTailCall - Given a tailcall in the tail position to a
4076 /// fastcc function from a fastcc function, emit the code to emit a 'proper'
4077 /// tail call.
4078 void ISel::EmitFastCCToFastCCTailCall(SDNode *TailCallNode) {
4079   unsigned CalleeCallArgSize =
4080     cast<ConstantSDNode>(TailCallNode->getOperand(2))->getValue();
4081   unsigned CallerArgSize = X86Lowering.getBytesToPopOnReturn();
4082
4083   //std::cerr << "****\n*** EMITTING TAIL CALL!\n****\n";
4084
4085   // Adjust argument stores.  Instead of storing to [ESP], f.e., store to frame
4086   // indexes that are relative to the incoming ESP.  If the incoming and
4087   // outgoing arg sizes are the same we will store to [InESP] instead of
4088   // [CurESP] and the ESP referenced will be relative to the incoming function
4089   // ESP.
4090   int ESPOffset = CallerArgSize-CalleeCallArgSize;
4091   SDOperand AdjustedArgStores =
4092     GetAdjustedArgumentStores(TailCallNode->getOperand(0), ESPOffset, *TheDAG);
4093
4094   // Copy the return address of the caller into a virtual register so we don't
4095   // clobber it.
4096   SDOperand RetVal;
4097   if (ESPOffset) {
4098     SDOperand RetValAddr = X86Lowering.getReturnAddressFrameIndex(*TheDAG);
4099     RetVal = TheDAG->getLoad(MVT::i32, TheDAG->getEntryNode(),
4100                                        RetValAddr, TheDAG->getSrcValue(NULL));
4101     SelectExpr(RetVal);
4102   }
4103
4104   // Codegen all of the argument stores.
4105   Select(AdjustedArgStores);
4106
4107   if (RetVal.Val) {
4108     // Emit a store of the saved ret value to the new location.
4109     MachineFunction &MF = TheDAG->getMachineFunction();
4110     int ReturnAddrFI = MF.getFrameInfo()->CreateFixedObject(4, ESPOffset-4);
4111     SDOperand RetValAddr = TheDAG->getFrameIndex(ReturnAddrFI, MVT::i32);
4112     Select(TheDAG->getNode(ISD::STORE, MVT::Other, TheDAG->getEntryNode(),
4113                            RetVal, RetValAddr));
4114   }
4115
4116   // Get the destination value.
4117   SDOperand Callee = TailCallNode->getOperand(1);
4118   bool isDirect = isa<GlobalAddressSDNode>(Callee) ||
4119                   isa<ExternalSymbolSDNode>(Callee);
4120   unsigned CalleeReg = 0;
4121   if (!isDirect) CalleeReg = SelectExpr(Callee);
4122
4123   unsigned RegOp1 = 0;
4124   unsigned RegOp2 = 0;
4125
4126   if (TailCallNode->getNumOperands() > 4) {
4127     // The first value is passed in (a part of) EAX, the second in EDX.
4128     RegOp1 = SelectExpr(TailCallNode->getOperand(4));
4129     if (TailCallNode->getNumOperands() > 5)
4130       RegOp2 = SelectExpr(TailCallNode->getOperand(5));
4131
4132     switch (TailCallNode->getOperand(4).getValueType()) {
4133     default: assert(0 && "Bad thing to pass in regs");
4134     case MVT::i1:
4135     case MVT::i8:
4136       BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(RegOp1);
4137       RegOp1 = X86::AL;
4138       break;
4139     case MVT::i16:
4140       BuildMI(BB, X86::MOV16rr, 1,X86::AX).addReg(RegOp1);
4141       RegOp1 = X86::AX;
4142       break;
4143     case MVT::i32:
4144       BuildMI(BB, X86::MOV32rr, 1,X86::EAX).addReg(RegOp1);
4145       RegOp1 = X86::EAX;
4146       break;
4147     }
4148     if (RegOp2)
4149       switch (TailCallNode->getOperand(5).getValueType()) {
4150       default: assert(0 && "Bad thing to pass in regs");
4151       case MVT::i1:
4152       case MVT::i8:
4153         BuildMI(BB, X86::MOV8rr, 1, X86::DL).addReg(RegOp2);
4154         RegOp2 = X86::DL;
4155         break;
4156       case MVT::i16:
4157         BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(RegOp2);
4158         RegOp2 = X86::DX;
4159         break;
4160       case MVT::i32:
4161         BuildMI(BB, X86::MOV32rr, 1, X86::EDX).addReg(RegOp2);
4162         RegOp2 = X86::EDX;
4163         break;
4164       }
4165   }
4166
4167   // Adjust ESP.
4168   if (ESPOffset)
4169     BuildMI(BB, X86::ADJSTACKPTRri, 2,
4170             X86::ESP).addReg(X86::ESP).addImm(ESPOffset);
4171
4172   // TODO: handle jmp [mem]
4173   if (!isDirect) {
4174     BuildMI(BB, X86::TAILJMPr, 1).addReg(CalleeReg);
4175   } else if (GlobalAddressSDNode *GASD = dyn_cast<GlobalAddressSDNode>(Callee)){
4176     BuildMI(BB, X86::TAILJMPd, 1).addGlobalAddress(GASD->getGlobal(), true);
4177   } else {
4178     ExternalSymbolSDNode *ESSDN = cast<ExternalSymbolSDNode>(Callee);
4179     BuildMI(BB, X86::TAILJMPd, 1).addExternalSymbol(ESSDN->getSymbol(), true);
4180   }
4181   // ADD IMPLICIT USE RegOp1/RegOp2's
4182 }
4183
4184
4185 void ISel::Select(SDOperand N) {
4186   unsigned Tmp1, Tmp2, Opc;
4187
4188   if (!ExprMap.insert(std::make_pair(N, 1)).second)
4189     return;  // Already selected.
4190
4191   SDNode *Node = N.Val;
4192
4193   switch (Node->getOpcode()) {
4194   default:
4195     Node->dump(); std::cerr << "\n";
4196     assert(0 && "Node not handled yet!");
4197   case ISD::EntryToken: return;  // Noop
4198   case ISD::TokenFactor:
4199     if (Node->getNumOperands() == 2) {
4200       bool OneFirst =
4201         getRegPressure(Node->getOperand(1))>getRegPressure(Node->getOperand(0));
4202       Select(Node->getOperand(OneFirst));
4203       Select(Node->getOperand(!OneFirst));
4204     } else {
4205       std::vector<std::pair<unsigned, unsigned> > OpsP;
4206       for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
4207         OpsP.push_back(std::make_pair(getRegPressure(Node->getOperand(i)), i));
4208       std::sort(OpsP.begin(), OpsP.end());
4209       std::reverse(OpsP.begin(), OpsP.end());
4210       for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
4211         Select(Node->getOperand(OpsP[i].second));
4212     }
4213     return;
4214   case ISD::CopyToReg:
4215     if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
4216       Select(N.getOperand(0));
4217       Tmp1 = SelectExpr(N.getOperand(1));
4218     } else {
4219       Tmp1 = SelectExpr(N.getOperand(1));
4220       Select(N.getOperand(0));
4221     }
4222     Tmp2 = cast<RegSDNode>(N)->getReg();
4223
4224     if (Tmp1 != Tmp2) {
4225       switch (N.getOperand(1).getValueType()) {
4226       default: assert(0 && "Invalid type for operation!");
4227       case MVT::i1:
4228       case MVT::i8:  Opc = X86::MOV8rr; break;
4229       case MVT::i16: Opc = X86::MOV16rr; break;
4230       case MVT::i32: Opc = X86::MOV32rr; break;
4231       case MVT::f32: Opc = X86::MOVAPSrr; break;
4232       case MVT::f64:
4233         if (X86ScalarSSE) {
4234           Opc = X86::MOVAPDrr;
4235         } else {
4236           Opc = X86::FpMOV;
4237           ContainsFPCode = true;
4238         }
4239         break;
4240       }
4241       BuildMI(BB, Opc, 1, Tmp2).addReg(Tmp1);
4242     }
4243     return;
4244   case ISD::RET:
4245     if (N.getOperand(0).getOpcode() == ISD::CALLSEQ_END ||
4246         N.getOperand(0).getOpcode() == X86ISD::TAILCALL ||
4247         N.getOperand(0).getOpcode() == ISD::TokenFactor)
4248       if (EmitPotentialTailCall(Node))
4249         return;
4250
4251     switch (N.getNumOperands()) {
4252     default:
4253       assert(0 && "Unknown return instruction!");
4254     case 3:
4255       assert(N.getOperand(1).getValueType() == MVT::i32 &&
4256              N.getOperand(2).getValueType() == MVT::i32 &&
4257              "Unknown two-register value!");
4258       if (getRegPressure(N.getOperand(1)) > getRegPressure(N.getOperand(2))) {
4259         Tmp1 = SelectExpr(N.getOperand(1));
4260         Tmp2 = SelectExpr(N.getOperand(2));
4261       } else {
4262         Tmp2 = SelectExpr(N.getOperand(2));
4263         Tmp1 = SelectExpr(N.getOperand(1));
4264       }
4265       Select(N.getOperand(0));
4266
4267       BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Tmp1);
4268       BuildMI(BB, X86::MOV32rr, 1, X86::EDX).addReg(Tmp2);
4269       break;
4270     case 2:
4271       if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
4272         Select(N.getOperand(0));
4273         Tmp1 = SelectExpr(N.getOperand(1));
4274       } else {
4275         Tmp1 = SelectExpr(N.getOperand(1));
4276         Select(N.getOperand(0));
4277       }
4278       switch (N.getOperand(1).getValueType()) {
4279       default: assert(0 && "All other types should have been promoted!!");
4280       case MVT::f32:
4281         if (X86ScalarSSE) {
4282           // Spill the value to memory and reload it into top of stack.
4283           unsigned Size = MVT::getSizeInBits(MVT::f32)/8;
4284           MachineFunction *F = BB->getParent();
4285           int FrameIdx = F->getFrameInfo()->CreateStackObject(Size, Size);
4286           addFrameReference(BuildMI(BB, X86::MOVSSmr, 5), FrameIdx).addReg(Tmp1);
4287           addFrameReference(BuildMI(BB, X86::FLD32m, 4, X86::FP0), FrameIdx);
4288           BuildMI(BB, X86::FpSETRESULT, 1).addReg(X86::FP0);
4289           ContainsFPCode = true;
4290         } else {
4291           assert(0 && "MVT::f32 only legal with scalar sse fp");
4292           abort();
4293         }
4294         break;
4295       case MVT::f64:
4296         if (X86ScalarSSE) {
4297           // Spill the value to memory and reload it into top of stack.
4298           unsigned Size = MVT::getSizeInBits(MVT::f64)/8;
4299           MachineFunction *F = BB->getParent();
4300           int FrameIdx = F->getFrameInfo()->CreateStackObject(Size, Size);
4301           addFrameReference(BuildMI(BB, X86::MOVSDmr, 5), FrameIdx).addReg(Tmp1);
4302           addFrameReference(BuildMI(BB, X86::FLD64m, 4, X86::FP0), FrameIdx);
4303           BuildMI(BB, X86::FpSETRESULT, 1).addReg(X86::FP0);
4304           ContainsFPCode = true;
4305         } else {
4306           BuildMI(BB, X86::FpSETRESULT, 1).addReg(Tmp1);
4307         }
4308         break;
4309       case MVT::i32:
4310         BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Tmp1);
4311         break;
4312       }
4313       break;
4314     case 1:
4315       Select(N.getOperand(0));
4316       break;
4317     }
4318     if (X86Lowering.getBytesToPopOnReturn() == 0)
4319       BuildMI(BB, X86::RET, 0); // Just emit a 'ret' instruction
4320     else
4321       BuildMI(BB, X86::RETI, 1).addImm(X86Lowering.getBytesToPopOnReturn());
4322     return;
4323   case ISD::BR: {
4324     Select(N.getOperand(0));
4325     MachineBasicBlock *Dest =
4326       cast<BasicBlockSDNode>(N.getOperand(1))->getBasicBlock();
4327     BuildMI(BB, X86::JMP, 1).addMBB(Dest);
4328     return;
4329   }
4330
4331   case ISD::BRCOND: {
4332     MachineBasicBlock *Dest =
4333       cast<BasicBlockSDNode>(N.getOperand(2))->getBasicBlock();
4334
4335     // Try to fold a setcc into the branch.  If this fails, emit a test/jne
4336     // pair.
4337     if (EmitBranchCC(Dest, N.getOperand(0), N.getOperand(1))) {
4338       if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {
4339         Select(N.getOperand(0));
4340         Tmp1 = SelectExpr(N.getOperand(1));
4341       } else {
4342         Tmp1 = SelectExpr(N.getOperand(1));
4343         Select(N.getOperand(0));
4344       }
4345       BuildMI(BB, X86::TEST8rr, 2).addReg(Tmp1).addReg(Tmp1);
4346       BuildMI(BB, X86::JNE, 1).addMBB(Dest);
4347     }
4348
4349     return;
4350   }
4351
4352   case ISD::LOAD:
4353     // If this load could be folded into the only using instruction, and if it
4354     // is safe to emit the instruction here, try to do so now.
4355     if (Node->hasNUsesOfValue(1, 0)) {
4356       SDOperand TheVal = N.getValue(0);
4357       SDNode *User = 0;
4358       for (SDNode::use_iterator UI = Node->use_begin(); ; ++UI) {
4359         assert(UI != Node->use_end() && "Didn't find use!");
4360         SDNode *UN = *UI;
4361         for (unsigned i = 0, e = UN->getNumOperands(); i != e; ++i)
4362           if (UN->getOperand(i) == TheVal) {
4363             User = UN;
4364             goto FoundIt;
4365           }
4366       }
4367     FoundIt:
4368       // Only handle unary operators right now.
4369       if (User->getNumOperands() == 1) {
4370         ExprMap.erase(N);
4371         SelectExpr(SDOperand(User, 0));
4372         return;
4373       }
4374     }
4375     ExprMap.erase(N);
4376     SelectExpr(N);
4377     return;
4378   case ISD::READPORT:
4379   case ISD::EXTLOAD:
4380   case ISD::SEXTLOAD:
4381   case ISD::ZEXTLOAD:
4382   case ISD::DYNAMIC_STACKALLOC:
4383   case X86ISD::TAILCALL:
4384   case X86ISD::CALL:
4385     ExprMap.erase(N);
4386     SelectExpr(N);
4387     return;
4388   case ISD::CopyFromReg:
4389   case X86ISD::FILD64m:
4390     ExprMap.erase(N);
4391     SelectExpr(N.getValue(0));
4392     return;
4393
4394   case ISD::TRUNCSTORE: {  // truncstore chain, val, ptr, SRCVALUE, storety
4395     X86AddressMode AM;
4396     MVT::ValueType StoredTy = cast<VTSDNode>(N.getOperand(4))->getVT();
4397     assert((StoredTy == MVT::i1 || StoredTy == MVT::f32 ||
4398             StoredTy == MVT::i16 /*FIXME: THIS IS JUST FOR TESTING!*/)
4399            && "Unsupported TRUNCSTORE for this target!");
4400
4401     if (StoredTy == MVT::i16) {
4402       // FIXME: This is here just to allow testing.  X86 doesn't really have a
4403       // TRUNCSTORE i16 operation, but this is required for targets that do not
4404       // have 16-bit integer registers.  We occasionally disable 16-bit integer
4405       // registers to test the promotion code.
4406       Select(N.getOperand(0));
4407       Tmp1 = SelectExpr(N.getOperand(1));
4408       SelectAddress(N.getOperand(2), AM);
4409
4410       BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Tmp1);
4411       addFullAddress(BuildMI(BB, X86::MOV16mr, 5), AM).addReg(X86::AX);
4412       return;
4413     }
4414
4415     // Store of constant bool?
4416     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
4417       if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(2))) {
4418         Select(N.getOperand(0));
4419         SelectAddress(N.getOperand(2), AM);
4420       } else {
4421         SelectAddress(N.getOperand(2), AM);
4422         Select(N.getOperand(0));
4423       }
4424       addFullAddress(BuildMI(BB, X86::MOV8mi, 5), AM).addImm(CN->getValue());
4425       return;
4426     }
4427
4428     switch (StoredTy) {
4429     default: assert(0 && "Cannot truncstore this type!");
4430     case MVT::i1: Opc = X86::MOV8mr; break;
4431     case MVT::f32:
4432       assert(!X86ScalarSSE && "Cannot truncstore scalar SSE regs");
4433       Opc = X86::FST32m; break;
4434     }
4435
4436     std::vector<std::pair<unsigned, unsigned> > RP;
4437     RP.push_back(std::make_pair(getRegPressure(N.getOperand(0)), 0));
4438     RP.push_back(std::make_pair(getRegPressure(N.getOperand(1)), 1));
4439     RP.push_back(std::make_pair(getRegPressure(N.getOperand(2)), 2));
4440     std::sort(RP.begin(), RP.end());
4441
4442     Tmp1 = 0;   // Silence a warning.
4443     for (unsigned i = 0; i != 3; ++i)
4444       switch (RP[2-i].second) {
4445       default: assert(0 && "Unknown operand number!");
4446       case 0: Select(N.getOperand(0)); break;
4447       case 1: Tmp1 = SelectExpr(N.getOperand(1)); break;
4448       case 2: SelectAddress(N.getOperand(2), AM); break;
4449       }
4450
4451     addFullAddress(BuildMI(BB, Opc, 4+1), AM).addReg(Tmp1);
4452     return;
4453   }
4454   case ISD::STORE: {
4455     X86AddressMode AM;
4456
4457     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
4458       Opc = 0;
4459       switch (CN->getValueType(0)) {
4460       default: assert(0 && "Invalid type for operation!");
4461       case MVT::i1:
4462       case MVT::i8:  Opc = X86::MOV8mi; break;
4463       case MVT::i16: Opc = X86::MOV16mi; break;
4464       case MVT::i32: Opc = X86::MOV32mi; break;
4465       }
4466       if (Opc) {
4467         if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(2))) {
4468           Select(N.getOperand(0));
4469           SelectAddress(N.getOperand(2), AM);
4470         } else {
4471           SelectAddress(N.getOperand(2), AM);
4472           Select(N.getOperand(0));
4473         }
4474         addFullAddress(BuildMI(BB, Opc, 4+1), AM).addImm(CN->getValue());
4475         return;
4476       }
4477     } else if (GlobalAddressSDNode *GA =
4478                       dyn_cast<GlobalAddressSDNode>(N.getOperand(1))) {
4479       assert(GA->getValueType(0) == MVT::i32 && "Bad pointer operand");
4480
4481       if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(2))) {
4482         Select(N.getOperand(0));
4483         SelectAddress(N.getOperand(2), AM);
4484       } else {
4485         SelectAddress(N.getOperand(2), AM);
4486         Select(N.getOperand(0));
4487       }
4488       addFullAddress(BuildMI(BB, X86::MOV32mi, 4+1),
4489                      AM).addGlobalAddress(GA->getGlobal());
4490       return;
4491     }
4492
4493     // Check to see if this is a load/op/store combination.
4494     if (TryToFoldLoadOpStore(Node))
4495       return;
4496
4497     switch (N.getOperand(1).getValueType()) {
4498     default: assert(0 && "Cannot store this type!");
4499     case MVT::i1:
4500     case MVT::i8:  Opc = X86::MOV8mr; break;
4501     case MVT::i16: Opc = X86::MOV16mr; break;
4502     case MVT::i32: Opc = X86::MOV32mr; break;
4503     case MVT::f32: Opc = X86::MOVSSmr; break;
4504     case MVT::f64: Opc = X86ScalarSSE ? X86::MOVSDmr : X86::FST64m; break;
4505     }
4506
4507     std::vector<std::pair<unsigned, unsigned> > RP;
4508     RP.push_back(std::make_pair(getRegPressure(N.getOperand(0)), 0));
4509     RP.push_back(std::make_pair(getRegPressure(N.getOperand(1)), 1));
4510     RP.push_back(std::make_pair(getRegPressure(N.getOperand(2)), 2));
4511     std::sort(RP.begin(), RP.end());
4512
4513     Tmp1 = 0; // Silence a warning.
4514     for (unsigned i = 0; i != 3; ++i)
4515       switch (RP[2-i].second) {
4516       default: assert(0 && "Unknown operand number!");
4517       case 0: Select(N.getOperand(0)); break;
4518       case 1: Tmp1 = SelectExpr(N.getOperand(1)); break;
4519       case 2: SelectAddress(N.getOperand(2), AM); break;
4520       }
4521
4522     addFullAddress(BuildMI(BB, Opc, 4+1), AM).addReg(Tmp1);
4523     return;
4524   }
4525   case ISD::CALLSEQ_START:
4526     Select(N.getOperand(0));
4527     // Stack amount
4528     Tmp1 = cast<ConstantSDNode>(N.getOperand(1))->getValue();
4529     BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addImm(Tmp1);
4530     return;
4531   case ISD::CALLSEQ_END:
4532     Select(N.getOperand(0));
4533     return;
4534   case ISD::MEMSET: {
4535     Select(N.getOperand(0));  // Select the chain.
4536     unsigned Align =
4537       (unsigned)cast<ConstantSDNode>(Node->getOperand(4))->getValue();
4538     if (Align == 0) Align = 1;
4539
4540     // Turn the byte code into # iterations
4541     unsigned CountReg;
4542     unsigned Opcode;
4543     if (ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Node->getOperand(2))) {
4544       unsigned Val = ValC->getValue() & 255;
4545
4546       // If the value is a constant, then we can potentially use larger sets.
4547       switch (Align & 3) {
4548       case 2:   // WORD aligned
4549         CountReg = MakeReg(MVT::i32);
4550         if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Node->getOperand(3))) {
4551           BuildMI(BB, X86::MOV32ri, 1, CountReg).addImm(I->getValue()/2);
4552         } else {
4553           unsigned ByteReg = SelectExpr(Node->getOperand(3));
4554           BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(1);
4555         }
4556         BuildMI(BB, X86::MOV16ri, 1, X86::AX).addImm((Val << 8) | Val);
4557         Opcode = X86::REP_STOSW;
4558         break;
4559       case 0:   // DWORD aligned
4560         CountReg = MakeReg(MVT::i32);
4561         if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Node->getOperand(3))) {
4562           BuildMI(BB, X86::MOV32ri, 1, CountReg).addImm(I->getValue()/4);
4563         } else {
4564           unsigned ByteReg = SelectExpr(Node->getOperand(3));
4565           BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(2);
4566         }
4567         Val = (Val << 8) | Val;
4568         BuildMI(BB, X86::MOV32ri, 1, X86::EAX).addImm((Val << 16) | Val);
4569         Opcode = X86::REP_STOSD;
4570         break;
4571       default:  // BYTE aligned
4572         CountReg = SelectExpr(Node->getOperand(3));
4573         BuildMI(BB, X86::MOV8ri, 1, X86::AL).addImm(Val);
4574         Opcode = X86::REP_STOSB;
4575         break;
4576       }
4577     } else {
4578       // If it's not a constant value we are storing, just fall back.  We could
4579       // try to be clever to form 16 bit and 32 bit values, but we don't yet.
4580       unsigned ValReg = SelectExpr(Node->getOperand(2));
4581       BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg);
4582       CountReg = SelectExpr(Node->getOperand(3));
4583       Opcode = X86::REP_STOSB;
4584     }
4585
4586     // No matter what the alignment is, we put the source in ESI, the
4587     // destination in EDI, and the count in ECX.
4588     unsigned TmpReg1 = SelectExpr(Node->getOperand(1));
4589     BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg);
4590     BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1);
4591     BuildMI(BB, Opcode, 0);
4592     return;
4593   }
4594   case ISD::MEMCPY: {
4595     Select(N.getOperand(0));  // Select the chain.
4596     unsigned Align =
4597       (unsigned)cast<ConstantSDNode>(Node->getOperand(4))->getValue();
4598     if (Align == 0) Align = 1;
4599
4600     // Turn the byte code into # iterations
4601     unsigned CountReg;
4602     unsigned Opcode;
4603     switch (Align & 3) {
4604     case 2:   // WORD aligned
4605       CountReg = MakeReg(MVT::i32);
4606       if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Node->getOperand(3))) {
4607         BuildMI(BB, X86::MOV32ri, 1, CountReg).addImm(I->getValue()/2);
4608       } else {
4609         unsigned ByteReg = SelectExpr(Node->getOperand(3));
4610         BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(1);
4611       }
4612       Opcode = X86::REP_MOVSW;
4613       break;
4614     case 0:   // DWORD aligned
4615       CountReg = MakeReg(MVT::i32);
4616       if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Node->getOperand(3))) {
4617         BuildMI(BB, X86::MOV32ri, 1, CountReg).addImm(I->getValue()/4);
4618       } else {
4619         unsigned ByteReg = SelectExpr(Node->getOperand(3));
4620         BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(2);
4621       }
4622       Opcode = X86::REP_MOVSD;
4623       break;
4624     default:  // BYTE aligned
4625       CountReg = SelectExpr(Node->getOperand(3));
4626       Opcode = X86::REP_MOVSB;
4627       break;
4628     }
4629
4630     // No matter what the alignment is, we put the source in ESI, the
4631     // destination in EDI, and the count in ECX.
4632     unsigned TmpReg1 = SelectExpr(Node->getOperand(1));
4633     unsigned TmpReg2 = SelectExpr(Node->getOperand(2));
4634     BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg);
4635     BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1);
4636     BuildMI(BB, X86::MOV32rr, 1, X86::ESI).addReg(TmpReg2);
4637     BuildMI(BB, Opcode, 0);
4638     return;
4639   }
4640   case ISD::WRITEPORT:
4641     if (Node->getOperand(2).getValueType() != MVT::i16) {
4642       std::cerr << "llvm.writeport: Address size is not 16 bits\n";
4643       exit(1);
4644     }
4645     Select(Node->getOperand(0)); // Emit the chain.
4646
4647     Tmp1 = SelectExpr(Node->getOperand(1));
4648     switch (Node->getOperand(1).getValueType()) {
4649     case MVT::i8:
4650       BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(Tmp1);
4651       Tmp2 = X86::OUT8ir;  Opc = X86::OUT8rr;
4652       break;
4653     case MVT::i16:
4654       BuildMI(BB, X86::MOV16rr, 1, X86::AX).addReg(Tmp1);
4655       Tmp2 = X86::OUT16ir; Opc = X86::OUT16rr;
4656       break;
4657     case MVT::i32:
4658       BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Tmp1);
4659       Tmp2 = X86::OUT32ir; Opc = X86::OUT32rr;
4660       break;
4661     default:
4662       std::cerr << "llvm.writeport: invalid data type for X86 target";
4663       exit(1);
4664     }
4665
4666     // If the port is a single-byte constant, use the immediate form.
4667     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node->getOperand(2)))
4668       if ((CN->getValue() & 255) == CN->getValue()) {
4669         BuildMI(BB, Tmp2, 1).addImm(CN->getValue());
4670         return;
4671       }
4672
4673     // Otherwise, move the I/O port address into the DX register.
4674     unsigned Reg = SelectExpr(Node->getOperand(2));
4675     BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg);
4676     BuildMI(BB, Opc, 0);
4677     return;
4678   }
4679   assert(0 && "Should not be reached!");
4680 }
4681
4682
4683 /// createX86PatternInstructionSelector - This pass converts an LLVM function
4684 /// into a machine code representation using pattern matching and a machine
4685 /// description file.
4686 ///
4687 FunctionPass *llvm::createX86PatternInstructionSelector(TargetMachine &TM) {
4688   return new ISel(TM);
4689 }