lib/Target/X86/X86ISelSimple.cpp

   1 //===-- InstSelectSimple.cpp - A simple instruction selector for x86 ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by the LLVM research group and is distributed under
   6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines a simple peephole instruction selector for the x86 target
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "X86.h"
  15 #include "X86InstrBuilder.h"
  16 #include "X86InstrInfo.h"
  17 #include "llvm/Constants.h"
  18 #include "llvm/DerivedTypes.h"
  19 #include "llvm/Function.h"
  20 #include "llvm/Instructions.h"
  21 #include "llvm/IntrinsicLowering.h"
  22 #include "llvm/Pass.h"
  23 #include "llvm/CodeGen/MachineConstantPool.h"
  24 #include "llvm/CodeGen/MachineFrameInfo.h"
  25 #include "llvm/CodeGen/MachineFunction.h"
  26 #include "llvm/CodeGen/SSARegMap.h"
  27 #include "llvm/Target/MRegisterInfo.h"
  28 #include "llvm/Target/TargetMachine.h"
  29 #include "llvm/Support/GetElementPtrTypeIterator.h"
  30 #include "llvm/Support/InstVisitor.h"
  31 #include "llvm/Support/CFG.h"
  32 #include "Support/Statistic.h"
  33 using namespace llvm;
  34
  35 namespace {
  36   Statistic<>
  37   NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
  38
  39   /// TypeClass - Used by the X86 backend to group LLVM types by their basic X86
  40   /// Representation.
  41   ///
  42   enum TypeClass {
  43     cByte, cShort, cInt, cFP, cLong
  44   };
  45 }
  46
  47 /// getClass - Turn a primitive type into a "class" number which is based on the
  48 /// size of the type, and whether or not it is floating point.
  49 ///
  50 static inline TypeClass getClass(const Type *Ty) {
  51   switch (Ty->getPrimitiveID()) {
  52   case Type::SByteTyID:
  53   case Type::UByteTyID:   return cByte;      // Byte operands are class #0
  54   case Type::ShortTyID:
  55   case Type::UShortTyID:  return cShort;     // Short operands are class #1
  56   case Type::IntTyID:
  57   case Type::UIntTyID:
  58   case Type::PointerTyID: return cInt;       // Int's and pointers are class #2
  59
  60   case Type::FloatTyID:
  61   case Type::DoubleTyID:  return cFP;        // Floating Point is #3
  62
  63   case Type::LongTyID:
  64   case Type::ULongTyID:   return cLong;      // Longs are class #4
  65   default:
  66     assert(0 && "Invalid type to getClass!");
  67     return cByte;  // not reached
  68   }
  69 }
  70
  71 // getClassB - Just like getClass, but treat boolean values as bytes.
  72 static inline TypeClass getClassB(const Type *Ty) {
  73   if (Ty == Type::BoolTy) return cByte;
  74   return getClass(Ty);
  75 }
  76
  77 namespace {
  78   struct ISel : public FunctionPass, InstVisitor<ISel> {
  79     TargetMachine &TM;
  80     MachineFunction *F;                 // The function we are compiling into
  81     MachineBasicBlock *BB;              // The current MBB we are compiling
  82     int VarArgsFrameIndex;              // FrameIndex for start of varargs area
  83     int ReturnAddressIndex;             // FrameIndex for the return address
  84
  85     std::map<Value*, unsigned> RegMap;  // Mapping between Val's and SSA Regs
  86
  87     // MBBMap - Mapping between LLVM BB -> Machine BB
  88     std::map<const BasicBlock*, MachineBasicBlock*> MBBMap;
  89
  90     ISel(TargetMachine &tm) : TM(tm), F(0), BB(0) {}
  91
  92     /// runOnFunction - Top level implementation of instruction selection for
  93     /// the entire function.
  94     ///
  95     bool runOnFunction(Function &Fn) {
  96       // First pass over the function, lower any unknown intrinsic functions
  97       // with the IntrinsicLowering class.
  98       LowerUnknownIntrinsicFunctionCalls(Fn);
  99
 100       F = &MachineFunction::construct(&Fn, TM);
 101
 102       // Create all of the machine basic blocks for the function...
 103       for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
 104         F->getBasicBlockList().push_back(MBBMap[I] = new MachineBasicBlock(I));
 105
 106       BB = &F->front();
 107
 108       // Set up a frame object for the return address.  This is used by the
 109       // llvm.returnaddress & llvm.frameaddress intrinisics.
 110       ReturnAddressIndex = F->getFrameInfo()->CreateFixedObject(4, -4);
 111
 112       // Copy incoming arguments off of the stack...
 113       LoadArgumentsToVirtualRegs(Fn);
 114
 115       // Instruction select everything except PHI nodes
 116       visit(Fn);
 117
 118       // Select the PHI nodes
 119       SelectPHINodes();
 120
 121       // Insert the FP_REG_KILL instructions into blocks that need them.
 122       InsertFPRegKills();
 123
 124       RegMap.clear();
 125       MBBMap.clear();
 126       F = 0;
 127       // We always build a machine code representation for the function
 128       return true;
 129     }
 130
 131     virtual const char *getPassName() const {
 132       return "X86 Simple Instruction Selection";
 133     }
 134
 135     /// visitBasicBlock - This method is called when we are visiting a new basic
 136     /// block.  This simply creates a new MachineBasicBlock to emit code into
 137     /// and adds it to the current MachineFunction.  Subsequent visit* for
 138     /// instructions will be invoked for all instructions in the basic block.
 139     ///
 140     void visitBasicBlock(BasicBlock &LLVM_BB) {
 141       BB = MBBMap[&LLVM_BB];
 142     }
 143
 144     /// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the
 145     /// function, lowering any calls to unknown intrinsic functions into the
 146     /// equivalent LLVM code.
 147     ///
 148     void LowerUnknownIntrinsicFunctionCalls(Function &F);
 149
 150     /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function
 151     /// from the stack into virtual registers.
 152     ///
 153     void LoadArgumentsToVirtualRegs(Function &F);
 154
 155     /// SelectPHINodes - Insert machine code to generate phis.  This is tricky
 156     /// because we have to generate our sources into the source basic blocks,
 157     /// not the current one.
 158     ///
 159     void SelectPHINodes();
 160
 161     /// InsertFPRegKills - Insert FP_REG_KILL instructions into basic blocks
 162     /// that need them.  This only occurs due to the floating point stackifier
 163     /// not being aggressive enough to handle arbitrary global stackification.
 164     ///
 165     void InsertFPRegKills();
 166
 167     // Visitation methods for various instructions.  These methods simply emit
 168     // fixed X86 code for each instruction.
 169     //
 170
 171     // Control flow operators
 172     void visitReturnInst(ReturnInst &RI);
 173     void visitBranchInst(BranchInst &BI);
 174
 175     struct ValueRecord {
 176       Value *Val;
 177       unsigned Reg;
 178       const Type *Ty;
 179       ValueRecord(unsigned R, const Type *T) : Val(0), Reg(R), Ty(T) {}
 180       ValueRecord(Value *V) : Val(V), Reg(0), Ty(V->getType()) {}
 181     };
 182     void doCall(const ValueRecord &Ret, MachineInstr *CallMI,
 183                 const std::vector<ValueRecord> &Args);
 184     void visitCallInst(CallInst &I);
 185     void visitIntrinsicCall(Intrinsic::ID ID, CallInst &I);
 186
 187     // Arithmetic operators
 188     void visitSimpleBinary(BinaryOperator &B, unsigned OpcodeClass);
 189     void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); }
 190     void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); }
 191     void visitMul(BinaryOperator &B);
 192
 193     void visitDiv(BinaryOperator &B) { visitDivRem(B); }
 194     void visitRem(BinaryOperator &B) { visitDivRem(B); }
 195     void visitDivRem(BinaryOperator &B);
 196
 197     // Bitwise operators
 198     void visitAnd(BinaryOperator &B) { visitSimpleBinary(B, 2); }
 199     void visitOr (BinaryOperator &B) { visitSimpleBinary(B, 3); }
 200     void visitXor(BinaryOperator &B) { visitSimpleBinary(B, 4); }
 201
 202     // Comparison operators...
 203     void visitSetCondInst(SetCondInst &I);
 204     unsigned EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
 205                             MachineBasicBlock *MBB,
 206                             MachineBasicBlock::iterator MBBI);
 207     void visitSelectInst(SelectInst &SI);
 208
 209
 210     // Memory Instructions
 211     void visitLoadInst(LoadInst &I);
 212     void visitStoreInst(StoreInst &I);
 213     void visitGetElementPtrInst(GetElementPtrInst &I);
 214     void visitAllocaInst(AllocaInst &I);
 215     void visitMallocInst(MallocInst &I);
 216     void visitFreeInst(FreeInst &I);
 217
 218     // Other operators
 219     void visitShiftInst(ShiftInst &I);
 220     void visitPHINode(PHINode &I) {}      // PHI nodes handled by second pass
 221     void visitCastInst(CastInst &I);
 222     void visitVANextInst(VANextInst &I);
 223     void visitVAArgInst(VAArgInst &I);
 224
 225     void visitInstruction(Instruction &I) {
 226       std::cerr << "Cannot instruction select: " << I;
 227       abort();
 228     }
 229
 230     /// promote32 - Make a value 32-bits wide, and put it somewhere.
 231     ///
 232     void promote32(unsigned targetReg, const ValueRecord &VR);
 233
 234     /// getAddressingMode - Get the addressing mode to use to address the
 235     /// specified value.  The returned value should be used with addFullAddress.
 236     void getAddressingMode(Value *Addr, unsigned &BaseReg, unsigned &Scale,
 237                            unsigned &IndexReg, unsigned &Disp);
 238
 239
 240     /// getGEPIndex - This is used to fold GEP instructions into X86 addressing
 241     /// expressions.
 242     void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
 243                      std::vector<Value*> &GEPOps,
 244                      std::vector<const Type*> &GEPTypes, unsigned &BaseReg,
 245                      unsigned &Scale, unsigned &IndexReg, unsigned &Disp);
 246
 247     /// isGEPFoldable - Return true if the specified GEP can be completely
 248     /// folded into the addressing mode of a load/store or lea instruction.
 249     bool isGEPFoldable(MachineBasicBlock *MBB,
 250                        Value *Src, User::op_iterator IdxBegin,
 251                        User::op_iterator IdxEnd, unsigned &BaseReg,
 252                        unsigned &Scale, unsigned &IndexReg, unsigned &Disp);
 253
 254     /// emitGEPOperation - Common code shared between visitGetElementPtrInst and
 255     /// constant expression GEP support.
 256     ///
 257     void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
 258                           Value *Src, User::op_iterator IdxBegin,
 259                           User::op_iterator IdxEnd, unsigned TargetReg);
 260
 261     /// emitCastOperation - Common code shared between visitCastInst and
 262     /// constant expression cast support.
 263     ///
 264     void emitCastOperation(MachineBasicBlock *BB,MachineBasicBlock::iterator IP,
 265                            Value *Src, const Type *DestTy, unsigned TargetReg);
 266
 267     /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary
 268     /// and constant expression support.
 269     ///
 270     void emitSimpleBinaryOperation(MachineBasicBlock *BB,
 271                                    MachineBasicBlock::iterator IP,
 272                                    Value *Op0, Value *Op1,
 273                                    unsigned OperatorClass, unsigned TargetReg);
 274
 275     /// emitBinaryFPOperation - This method handles emission of floating point
 276     /// Add (0), Sub (1), Mul (2), and Div (3) operations.
 277     void emitBinaryFPOperation(MachineBasicBlock *BB,
 278                                MachineBasicBlock::iterator IP,
 279                                Value *Op0, Value *Op1,
 280                                unsigned OperatorClass, unsigned TargetReg);
 281
 282     void emitMultiply(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
 283                       Value *Op0, Value *Op1, unsigned TargetReg);
 284
 285     void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
 286                     unsigned DestReg, const Type *DestTy,
 287                     unsigned Op0Reg, unsigned Op1Reg);
 288     void doMultiplyConst(MachineBasicBlock *MBB,
 289                          MachineBasicBlock::iterator MBBI,
 290                          unsigned DestReg, const Type *DestTy,
 291                          unsigned Op0Reg, unsigned Op1Val);
 292
 293     void emitDivRemOperation(MachineBasicBlock *BB,
 294                              MachineBasicBlock::iterator IP,
 295                              Value *Op0, Value *Op1, bool isDiv,
 296                              unsigned TargetReg);
 297
 298     /// emitSetCCOperation - Common code shared between visitSetCondInst and
 299     /// constant expression support.
 300     ///
 301     void emitSetCCOperation(MachineBasicBlock *BB,
 302                             MachineBasicBlock::iterator IP,
 303                             Value *Op0, Value *Op1, unsigned Opcode,
 304                             unsigned TargetReg);
 305
 306     /// emitShiftOperation - Common code shared between visitShiftInst and
 307     /// constant expression support.
 308     ///
 309     void emitShiftOperation(MachineBasicBlock *MBB,
 310                             MachineBasicBlock::iterator IP,
 311                             Value *Op, Value *ShiftAmount, bool isLeftShift,
 312                             const Type *ResultTy, unsigned DestReg);
 313
 314     /// emitSelectOperation - Common code shared between visitSelectInst and the
 315     /// constant expression support.
 316     void emitSelectOperation(MachineBasicBlock *MBB,
 317                              MachineBasicBlock::iterator IP,
 318                              Value *Cond, Value *TrueVal, Value *FalseVal,
 319                              unsigned DestReg);
 320
 321     /// copyConstantToRegister - Output the instructions required to put the
 322     /// specified constant into the specified register.
 323     ///
 324     void copyConstantToRegister(MachineBasicBlock *MBB,
 325                                 MachineBasicBlock::iterator MBBI,
 326                                 Constant *C, unsigned Reg);
 327
 328     /// makeAnotherReg - This method returns the next register number we haven't
 329     /// yet used.
 330     ///
 331     /// Long values are handled somewhat specially.  They are always allocated
 332     /// as pairs of 32 bit integer values.  The register number returned is the
 333     /// lower 32 bits of the long value, and the regNum+1 is the upper 32 bits
 334     /// of the long value.
 335     ///
 336     unsigned makeAnotherReg(const Type *Ty) {
 337       assert(dynamic_cast<const X86RegisterInfo*>(TM.getRegisterInfo()) &&
 338              "Current target doesn't have X86 reg info??");
 339       const X86RegisterInfo *MRI =
 340         static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
 341       if (Ty == Type::LongTy || Ty == Type::ULongTy) {
 342         const TargetRegisterClass *RC = MRI->getRegClassForType(Type::IntTy);
 343         // Create the lower part
 344         F->getSSARegMap()->createVirtualRegister(RC);
 345         // Create the upper part.
 346         return F->getSSARegMap()->createVirtualRegister(RC)-1;
 347       }
 348
 349       // Add the mapping of regnumber => reg class to MachineFunction
 350       const TargetRegisterClass *RC = MRI->getRegClassForType(Ty);
 351       return F->getSSARegMap()->createVirtualRegister(RC);
 352     }
 353
 354     /// getReg - This method turns an LLVM value into a register number.  This
 355     /// is guaranteed to produce the same register number for a particular value
 356     /// every time it is queried.
 357     ///
 358     unsigned getReg(Value &V) { return getReg(&V); }  // Allow references
 359     unsigned getReg(Value *V) {
 360       // Just append to the end of the current bb.
 361       MachineBasicBlock::iterator It = BB->end();
 362       return getReg(V, BB, It);
 363     }
 364     unsigned getReg(Value *V, MachineBasicBlock *MBB,
 365                     MachineBasicBlock::iterator IPt) {
 366       // If this operand is a constant, emit the code to copy the constant into
 367       // the register here...
 368       //
 369       if (Constant *C = dyn_cast<Constant>(V)) {
 370         unsigned Reg = makeAnotherReg(V->getType());
 371         copyConstantToRegister(MBB, IPt, C, Reg);
 372         return Reg;
 373       } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 374         unsigned Reg = makeAnotherReg(V->getType());
 375         // Move the address of the global into the register
 376         BuildMI(*MBB, IPt, X86::MOV32ri, 1, Reg).addGlobalAddress(GV);
 377         return Reg;
 378       } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
 379         // Do not emit noop casts at all.
 380         if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType()))
 381           return getReg(CI->getOperand(0), MBB, IPt);
 382       }
 383
 384       unsigned &Reg = RegMap[V];
 385       if (Reg == 0) {
 386         Reg = makeAnotherReg(V->getType());
 387         RegMap[V] = Reg;
 388       }
 389
 390       return Reg;
 391     }
 392   };
 393 }
 394
 395 /// copyConstantToRegister - Output the instructions required to put the
 396 /// specified constant into the specified register.
 397 ///
 398 void ISel::copyConstantToRegister(MachineBasicBlock *MBB,
 399                                   MachineBasicBlock::iterator IP,
 400                                   Constant *C, unsigned R) {
 401   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
 402     unsigned Class = 0;
 403     switch (CE->getOpcode()) {
 404     case Instruction::GetElementPtr:
 405       emitGEPOperation(MBB, IP, CE->getOperand(0),
 406                        CE->op_begin()+1, CE->op_end(), R);
 407       return;
 408     case Instruction::Cast:
 409       emitCastOperation(MBB, IP, CE->getOperand(0), CE->getType(), R);
 410       return;
 411
 412     case Instruction::Xor: ++Class; // FALL THROUGH
 413     case Instruction::Or:  ++Class; // FALL THROUGH
 414     case Instruction::And: ++Class; // FALL THROUGH
 415     case Instruction::Sub: ++Class; // FALL THROUGH
 416     case Instruction::Add:
 417       emitSimpleBinaryOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
 418                                 Class, R);
 419       return;
 420
 421     case Instruction::Mul:
 422       emitMultiply(MBB, IP, CE->getOperand(0), CE->getOperand(1), R);
 423       return;
 424
 425     case Instruction::Div:
 426     case Instruction::Rem:
 427       emitDivRemOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
 428                           CE->getOpcode() == Instruction::Div, R);
 429       return;
 430
 431     case Instruction::SetNE:
 432     case Instruction::SetEQ:
 433     case Instruction::SetLT:
 434     case Instruction::SetGT:
 435     case Instruction::SetLE:
 436     case Instruction::SetGE:
 437       emitSetCCOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
 438                          CE->getOpcode(), R);
 439       return;
 440
 441     case Instruction::Shl:
 442     case Instruction::Shr:
 443       emitShiftOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
 444                          CE->getOpcode() == Instruction::Shl, CE->getType(), R);
 445       return;
 446
 447     case Instruction::Select:
 448       emitSelectOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
 449                           CE->getOperand(2), R);
 450       return;
 451
 452     default:
 453       std::cerr << "Offending expr: " << C << "\n";
 454       assert(0 && "Constant expression not yet handled!\n");
 455     }
 456   }
 457
 458   if (C->getType()->isIntegral()) {
 459     unsigned Class = getClassB(C->getType());
 460
 461     if (Class == cLong) {
 462       // Copy the value into the register pair.
 463       uint64_t Val = cast<ConstantInt>(C)->getRawValue();
 464       BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addImm(Val & 0xFFFFFFFF);
 465       BuildMI(*MBB, IP, X86::MOV32ri, 1, R+1).addImm(Val >> 32);
 466       return;
 467     }
 468
 469     assert(Class <= cInt && "Type not handled yet!");
 470
 471     static const unsigned IntegralOpcodeTab[] = {
 472       X86::MOV8ri, X86::MOV16ri, X86::MOV32ri
 473     };
 474
 475     if (C->getType() == Type::BoolTy) {
 476       BuildMI(*MBB, IP, X86::MOV8ri, 1, R).addImm(C == ConstantBool::True);
 477     } else {
 478       ConstantInt *CI = cast<ConstantInt>(C);
 479       BuildMI(*MBB, IP, IntegralOpcodeTab[Class],1,R).addImm(CI->getRawValue());
 480     }
 481   } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
 482     if (CFP->isExactlyValue(+0.0))
 483       BuildMI(*MBB, IP, X86::FLD0, 0, R);
 484     else if (CFP->isExactlyValue(+1.0))
 485       BuildMI(*MBB, IP, X86::FLD1, 0, R);
 486     else {
 487       // Otherwise we need to spill the constant to memory...
 488       MachineConstantPool *CP = F->getConstantPool();
 489       unsigned CPI = CP->getConstantPoolIndex(CFP);
 490       const Type *Ty = CFP->getType();
 491
 492       assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
 493       unsigned LoadOpcode = Ty == Type::FloatTy ? X86::FLD32m : X86::FLD64m;
 494       addConstantPoolReference(BuildMI(*MBB, IP, LoadOpcode, 4, R), CPI);
 495     }
 496
 497   } else if (isa<ConstantPointerNull>(C)) {
 498     // Copy zero (null pointer) to the register.
 499     BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addImm(0);
 500   } else if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(C)) {
 501     BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addGlobalAddress(CPR->getValue());
 502   } else {
 503     std::cerr << "Offending constant: " << C << "\n";
 504     assert(0 && "Type not handled yet!");
 505   }
 506 }
 507
 508 /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function from
 509 /// the stack into virtual registers.
 510 ///
 511 void ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
 512   // Emit instructions to load the arguments...  On entry to a function on the
 513   // X86, the stack frame looks like this:
 514   //
 515   // [ESP] -- return address
 516   // [ESP + 4] -- first argument (leftmost lexically)
 517   // [ESP + 8] -- second argument, if first argument is four bytes in size
 518   //    ...
 519   //
 520   unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
 521   MachineFrameInfo *MFI = F->getFrameInfo();
 522
 523   for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) {
 524     bool ArgLive = !I->use_empty();
 525     unsigned Reg = ArgLive ? getReg(*I) : 0;
 526     int FI;          // Frame object index
 527
 528     switch (getClassB(I->getType())) {
 529     case cByte:
 530       if (ArgLive) {
 531         FI = MFI->CreateFixedObject(1, ArgOffset);
 532         addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Reg), FI);
 533       }
 534       break;
 535     case cShort:
 536       if (ArgLive) {
 537         FI = MFI->CreateFixedObject(2, ArgOffset);
 538         addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Reg), FI);
 539       }
 540       break;
 541     case cInt:
 542       if (ArgLive) {
 543         FI = MFI->CreateFixedObject(4, ArgOffset);
 544         addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
 545       }
 546       break;
 547     case cLong:
 548       if (ArgLive) {
 549         FI = MFI->CreateFixedObject(8, ArgOffset);
 550         addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
 551         addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg+1), FI, 4);
 552       }
 553       ArgOffset += 4;   // longs require 4 additional bytes
 554       break;
 555     case cFP:
 556       if (ArgLive) {
 557         unsigned Opcode;
 558         if (I->getType() == Type::FloatTy) {
 559           Opcode = X86::FLD32m;
 560           FI = MFI->CreateFixedObject(4, ArgOffset);
 561         } else {
 562           Opcode = X86::FLD64m;
 563           FI = MFI->CreateFixedObject(8, ArgOffset);
 564         }
 565         addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI);
 566       }
 567       if (I->getType() == Type::DoubleTy)
 568         ArgOffset += 4;   // doubles require 4 additional bytes
 569       break;
 570     default:
 571       assert(0 && "Unhandled argument type!");
 572     }
 573     ArgOffset += 4;  // Each argument takes at least 4 bytes on the stack...
 574   }
 575
 576   // If the function takes variable number of arguments, add a frame offset for
 577   // the start of the first vararg value... this is used to expand
 578   // llvm.va_start.
 579   if (Fn.getFunctionType()->isVarArg())
 580     VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
 581 }
 582
 583
 584 /// SelectPHINodes - Insert machine code to generate phis.  This is tricky
 585 /// because we have to generate our sources into the source basic blocks, not
 586 /// the current one.
 587 ///
 588 void ISel::SelectPHINodes() {
 589   const TargetInstrInfo &TII = TM.getInstrInfo();
 590   const Function &LF = *F->getFunction();  // The LLVM function...
 591   for (Function::const_iterator I = LF.begin(), E = LF.end(); I != E; ++I) {
 592     const BasicBlock *BB = I;
 593     MachineBasicBlock &MBB = *MBBMap[I];
 594
 595     // Loop over all of the PHI nodes in the LLVM basic block...
 596     MachineBasicBlock::iterator PHIInsertPoint = MBB.begin();
 597     for (BasicBlock::const_iterator I = BB->begin();
 598          PHINode *PN = const_cast<PHINode*>(dyn_cast<PHINode>(I)); ++I) {
 599
 600       // Create a new machine instr PHI node, and insert it.
 601       unsigned PHIReg = getReg(*PN);
 602       MachineInstr *PhiMI = BuildMI(MBB, PHIInsertPoint,
 603                                     X86::PHI, PN->getNumOperands(), PHIReg);
 604
 605       MachineInstr *LongPhiMI = 0;
 606       if (PN->getType() == Type::LongTy || PN->getType() == Type::ULongTy)
 607         LongPhiMI = BuildMI(MBB, PHIInsertPoint,
 608                             X86::PHI, PN->getNumOperands(), PHIReg+1);
 609
 610       // PHIValues - Map of blocks to incoming virtual registers.  We use this
 611       // so that we only initialize one incoming value for a particular block,
 612       // even if the block has multiple entries in the PHI node.
 613       //
 614       std::map<MachineBasicBlock*, unsigned> PHIValues;
 615
 616       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
 617         MachineBasicBlock *PredMBB = MBBMap[PN->getIncomingBlock(i)];
 618         unsigned ValReg;
 619         std::map<MachineBasicBlock*, unsigned>::iterator EntryIt =
 620           PHIValues.lower_bound(PredMBB);
 621
 622         if (EntryIt != PHIValues.end() && EntryIt->first == PredMBB) {
 623           // We already inserted an initialization of the register for this
 624           // predecessor.  Recycle it.
 625           ValReg = EntryIt->second;
 626
 627         } else {
 628           // Get the incoming value into a virtual register.
 629           //
 630           Value *Val = PN->getIncomingValue(i);
 631
 632           // If this is a constant or GlobalValue, we may have to insert code
 633           // into the basic block to compute it into a virtual register.
 634           if (isa<Constant>(Val) || isa<GlobalValue>(Val)) {
 635             if (isa<ConstantExpr>(Val)) {
 636               // Because we don't want to clobber any values which might be in
 637               // physical registers with the computation of this constant (which
 638               // might be arbitrarily complex if it is a constant expression),
 639               // just insert the computation at the top of the basic block.
 640               MachineBasicBlock::iterator PI = PredMBB->begin();
 641
 642               // Skip over any PHI nodes though!
 643               while (PI != PredMBB->end() && PI->getOpcode() == X86::PHI)
 644                 ++PI;
 645
 646               ValReg = getReg(Val, PredMBB, PI);
 647             } else {
 648               // Simple constants get emitted at the end of the basic block,
 649               // before any terminator instructions.  We "know" that the code to
 650               // move a constant into a register will never clobber any flags.
 651               ValReg = getReg(Val, PredMBB, PredMBB->getFirstTerminator());
 652             }
 653           } else {
 654             ValReg = getReg(Val);
 655           }
 656
 657           // Remember that we inserted a value for this PHI for this predecessor
 658           PHIValues.insert(EntryIt, std::make_pair(PredMBB, ValReg));
 659         }
 660
 661         PhiMI->addRegOperand(ValReg);
 662         PhiMI->addMachineBasicBlockOperand(PredMBB);
 663         if (LongPhiMI) {
 664           LongPhiMI->addRegOperand(ValReg+1);
 665           LongPhiMI->addMachineBasicBlockOperand(PredMBB);
 666         }
 667       }
 668
 669       // Now that we emitted all of the incoming values for the PHI node, make
 670       // sure to reposition the InsertPoint after the PHI that we just added.
 671       // This is needed because we might have inserted a constant into this
 672       // block, right after the PHI's which is before the old insert point!
 673       PHIInsertPoint = LongPhiMI ? LongPhiMI : PhiMI;
 674       ++PHIInsertPoint;
 675     }
 676   }
 677 }
 678
 679 /// RequiresFPRegKill - The floating point stackifier pass cannot insert
 680 /// compensation code on critical edges.  As such, it requires that we kill all
 681 /// FP registers on the exit from any blocks that either ARE critical edges, or
 682 /// branch to a block that has incoming critical edges.
 683 ///
 684 /// Note that this kill instruction will eventually be eliminated when
 685 /// restrictions in the stackifier are relaxed.
 686 ///
 687 static bool RequiresFPRegKill(const BasicBlock *BB) {
 688 #if 0
 689   for (succ_const_iterator SI = succ_begin(BB), E = succ_end(BB); SI!=E; ++SI) {
 690     const BasicBlock *Succ = *SI;
 691     pred_const_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
 692     ++PI;  // Block have at least one predecessory
 693     if (PI != PE) {             // If it has exactly one, this isn't crit edge
 694       // If this block has more than one predecessor, check all of the
 695       // predecessors to see if they have multiple successors.  If so, then the
 696       // block we are analyzing needs an FPRegKill.
 697       for (PI = pred_begin(Succ); PI != PE; ++PI) {
 698         const BasicBlock *Pred = *PI;
 699         succ_const_iterator SI2 = succ_begin(Pred);
 700         ++SI2;  // There must be at least one successor of this block.
 701         if (SI2 != succ_end(Pred))
 702           return true;   // Yes, we must insert the kill on this edge.
 703       }
 704     }
 705   }
 706   // If we got this far, there is no need to insert the kill instruction.
 707   return false;
 708 #else
 709   return true;
 710 #endif
 711 }
 712
 713 // InsertFPRegKills - Insert FP_REG_KILL instructions into basic blocks that
 714 // need them.  This only occurs due to the floating point stackifier not being
 715 // aggressive enough to handle arbitrary global stackification.
 716 //
 717 // Currently we insert an FP_REG_KILL instruction into each block that uses or
 718 // defines a floating point virtual register.
 719 //
 720 // When the global register allocators (like linear scan) finally update live
 721 // variable analysis, we can keep floating point values in registers across
 722 // portions of the CFG that do not involve critical edges.  This will be a big
 723 // win, but we are waiting on the global allocators before we can do this.
 724 //
 725 // With a bit of work, the floating point stackifier pass can be enhanced to
 726 // break critical edges as needed (to make a place to put compensation code),
 727 // but this will require some infrastructure improvements as well.
 728 //
 729 void ISel::InsertFPRegKills() {
 730   SSARegMap &RegMap = *F->getSSARegMap();
 731
 732   for (MachineFunction::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
 733     for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
 734       for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
 735       MachineOperand& MO = I->getOperand(i);
 736         if (MO.isRegister() && MO.getReg()) {
 737           unsigned Reg = MO.getReg();
 738           if (MRegisterInfo::isVirtualRegister(Reg))
 739             if (RegMap.getRegClass(Reg)->getSize() == 10)
 740               goto UsesFPReg;
 741         }
 742       }
 743     // If we haven't found an FP register use or def in this basic block, check
 744     // to see if any of our successors has an FP PHI node, which will cause a
 745     // copy to be inserted into this block.
 746     for (succ_const_iterator SI = succ_begin(BB->getBasicBlock()),
 747            E = succ_end(BB->getBasicBlock()); SI != E; ++SI) {
 748       MachineBasicBlock *SBB = MBBMap[*SI];
 749       for (MachineBasicBlock::iterator I = SBB->begin();
 750            I != SBB->end() && I->getOpcode() == X86::PHI; ++I) {
 751         if (RegMap.getRegClass(I->getOperand(0).getReg())->getSize() == 10)
 752           goto UsesFPReg;
 753       }
 754     }
 755     continue;
 756   UsesFPReg:
 757     // Okay, this block uses an FP register.  If the block has successors (ie,
 758     // it's not an unwind/return), insert the FP_REG_KILL instruction.
 759     if (BB->getBasicBlock()->getTerminator()->getNumSuccessors() &&
 760         RequiresFPRegKill(BB->getBasicBlock())) {
 761       BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
 762       ++NumFPKill;
 763     }
 764   }
 765 }
 766
 767
 768 // canFoldSetCCIntoBranchOrSelect - Return the setcc instruction if we can fold
 769 // it into the conditional branch or select instruction which is the only user
 770 // of the cc instruction.  This is the case if the conditional branch is the
 771 // only user of the setcc, and if the setcc is in the same basic block as the
 772 // conditional branch.  We also don't handle long arguments below, so we reject
 773 // them here as well.
 774 //
 775 static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) {
 776   if (SetCondInst *SCI = dyn_cast<SetCondInst>(V))
 777     if (SCI->hasOneUse()) {
 778       Instruction *User = cast<Instruction>(SCI->use_back());
 779       if ((isa<BranchInst>(User) || isa<SelectInst>(User)) &&
 780           SCI->getParent() == User->getParent() &&
 781           (getClassB(SCI->getOperand(0)->getType()) != cLong ||
 782            SCI->getOpcode() == Instruction::SetEQ ||
 783            SCI->getOpcode() == Instruction::SetNE))
 784         return SCI;
 785     }
 786   return 0;
 787 }
 788
 789 // Return a fixed numbering for setcc instructions which does not depend on the
 790 // order of the opcodes.
 791 //
 792 static unsigned getSetCCNumber(unsigned Opcode) {
 793   switch(Opcode) {
 794   default: assert(0 && "Unknown setcc instruction!");
 795   case Instruction::SetEQ: return 0;
 796   case Instruction::SetNE: return 1;
 797   case Instruction::SetLT: return 2;
 798   case Instruction::SetGE: return 3;
 799   case Instruction::SetGT: return 4;
 800   case Instruction::SetLE: return 5;
 801   }
 802 }
 803
 804 // LLVM  -> X86 signed  X86 unsigned
 805 // -----    ----------  ------------
 806 // seteq -> sete        sete
 807 // setne -> setne       setne
 808 // setlt -> setl        setb
 809 // setge -> setge       setae
 810 // setgt -> setg        seta
 811 // setle -> setle       setbe
 812 // ----
 813 //          sets                       // Used by comparison with 0 optimization
 814 //          setns
 815 static const unsigned SetCCOpcodeTab[2][8] = {
 816   { X86::SETEr, X86::SETNEr, X86::SETBr, X86::SETAEr, X86::SETAr, X86::SETBEr,
 817     0, 0 },
 818   { X86::SETEr, X86::SETNEr, X86::SETLr, X86::SETGEr, X86::SETGr, X86::SETLEr,
 819     X86::SETSr, X86::SETNSr },
 820 };
 821
 822 // EmitComparison - This function emits a comparison of the two operands,
 823 // returning the extended setcc code to use.
 824 unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
 825                               MachineBasicBlock *MBB,
 826                               MachineBasicBlock::iterator IP) {
 827   // The arguments are already supposed to be of the same type.
 828   const Type *CompTy = Op0->getType();
 829   unsigned Class = getClassB(CompTy);
 830   unsigned Op0r = getReg(Op0, MBB, IP);
 831
 832   // Special case handling of: cmp R, i
 833   if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
 834     if (Class == cByte || Class == cShort || Class == cInt) {
 835       unsigned Op1v = CI->getRawValue();
 836
 837       // Mask off any upper bits of the constant, if there are any...
 838       Op1v &= (1ULL << (8 << Class)) - 1;
 839
 840       // If this is a comparison against zero, emit more efficient code.  We
 841       // can't handle unsigned comparisons against zero unless they are == or
 842       // !=.  These should have been strength reduced already anyway.
 843       if (Op1v == 0 && (CompTy->isSigned() || OpNum < 2)) {
 844         static const unsigned TESTTab[] = {
 845           X86::TEST8rr, X86::TEST16rr, X86::TEST32rr
 846         };
 847         BuildMI(*MBB, IP, TESTTab[Class], 2).addReg(Op0r).addReg(Op0r);
 848
 849         if (OpNum == 2) return 6;   // Map jl -> js
 850         if (OpNum == 3) return 7;   // Map jg -> jns
 851         return OpNum;
 852       }
 853
 854       static const unsigned CMPTab[] = {
 855         X86::CMP8ri, X86::CMP16ri, X86::CMP32ri
 856       };
 857
 858       BuildMI(*MBB, IP, CMPTab[Class], 2).addReg(Op0r).addImm(Op1v);
 859       return OpNum;
 860     } else {
 861       assert(Class == cLong && "Unknown integer class!");
 862       unsigned LowCst = CI->getRawValue();
 863       unsigned HiCst = CI->getRawValue() >> 32;
 864       if (OpNum < 2) {    // seteq, setne
 865         unsigned LoTmp = Op0r;
 866         if (LowCst != 0) {
 867           LoTmp = makeAnotherReg(Type::IntTy);
 868           BuildMI(*MBB, IP, X86::XOR32ri, 2, LoTmp).addReg(Op0r).addImm(LowCst);
 869         }
 870         unsigned HiTmp = Op0r+1;
 871         if (HiCst != 0) {
 872           HiTmp = makeAnotherReg(Type::IntTy);
 873           BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst);
 874         }
 875         unsigned FinalTmp = makeAnotherReg(Type::IntTy);
 876         BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
 877         return OpNum;
 878       } else {
 879         // Emit a sequence of code which compares the high and low parts once
 880         // each, then uses a conditional move to handle the overflow case.  For
 881         // example, a setlt for long would generate code like this:
 882         //
 883         // AL = lo(op1) < lo(op2)   // Signedness depends on operands
 884         // BL = hi(op1) < hi(op2)   // Always unsigned comparison
 885         // dest = hi(op1) == hi(op2) ? AL : BL;
 886         //
 887
 888         // FIXME: This would be much better if we had hierarchical register
 889         // classes!  Until then, hardcode registers so that we can deal with
 890         // their aliases (because we don't have conditional byte moves).
 891         //
 892         BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst);
 893         BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
 894         BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst);
 895         BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL);
 896         BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
 897         BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
 898         BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX)
 899           .addReg(X86::AX);
 900         // NOTE: visitSetCondInst knows that the value is dumped into the BL
 901         // register at this point for long values...
 902         return OpNum;
 903       }
 904     }
 905   }
 906
 907   // Special case handling of comparison against +/- 0.0
 908   if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op1))
 909     if (CFP->isExactlyValue(+0.0) || CFP->isExactlyValue(-0.0)) {
 910       BuildMI(*MBB, IP, X86::FTST, 1).addReg(Op0r);
 911       BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
 912       BuildMI(*MBB, IP, X86::SAHF, 1);
 913       return OpNum;
 914     }
 915
 916   unsigned Op1r = getReg(Op1, MBB, IP);
 917   switch (Class) {
 918   default: assert(0 && "Unknown type class!");
 919     // Emit: cmp <var1>, <var2> (do the comparison).  We can
 920     // compare 8-bit with 8-bit, 16-bit with 16-bit, 32-bit with
 921     // 32-bit.
 922   case cByte:
 923     BuildMI(*MBB, IP, X86::CMP8rr, 2).addReg(Op0r).addReg(Op1r);
 924     break;
 925   case cShort:
 926     BuildMI(*MBB, IP, X86::CMP16rr, 2).addReg(Op0r).addReg(Op1r);
 927     break;
 928   case cInt:
 929     BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r);
 930     break;
 931   case cFP:
 932     if (0) { // for processors prior to the P6
 933       BuildMI(*MBB, IP, X86::FpUCOM, 2).addReg(Op0r).addReg(Op1r);
 934       BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
 935       BuildMI(*MBB, IP, X86::SAHF, 1);
 936     } else {
 937       BuildMI(*MBB, IP, X86::FpUCOMI, 2).addReg(Op0r).addReg(Op1r);
 938     }
 939     break;
 940
 941   case cLong:
 942     if (OpNum < 2) {    // seteq, setne
 943       unsigned LoTmp = makeAnotherReg(Type::IntTy);
 944       unsigned HiTmp = makeAnotherReg(Type::IntTy);
 945       unsigned FinalTmp = makeAnotherReg(Type::IntTy);
 946       BuildMI(*MBB, IP, X86::XOR32rr, 2, LoTmp).addReg(Op0r).addReg(Op1r);
 947       BuildMI(*MBB, IP, X86::XOR32rr, 2, HiTmp).addReg(Op0r+1).addReg(Op1r+1);
 948       BuildMI(*MBB, IP, X86::OR32rr,  2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
 949       break;  // Allow the sete or setne to be generated from flags set by OR
 950     } else {
 951       // Emit a sequence of code which compares the high and low parts once
 952       // each, then uses a conditional move to handle the overflow case.  For
 953       // example, a setlt for long would generate code like this:
 954       //
 955       // AL = lo(op1) < lo(op2)   // Signedness depends on operands
 956       // BL = hi(op1) < hi(op2)   // Always unsigned comparison
 957       // dest = hi(op1) == hi(op2) ? AL : BL;
 958       //
 959
 960       // FIXME: This would be much better if we had hierarchical register
 961       // classes!  Until then, hardcode registers so that we can deal with their
 962       // aliases (because we don't have conditional byte moves).
 963       //
 964       BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r);
 965       BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
 966       BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r+1).addReg(Op1r+1);
 967       BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0, X86::BL);
 968       BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
 969       BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
 970       BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX)
 971                                                    .addReg(X86::AX);
 972       // NOTE: visitSetCondInst knows that the value is dumped into the BL
 973       // register at this point for long values...
 974       return OpNum;
 975     }
 976   }
 977   return OpNum;
 978 }
 979
 980 /// SetCC instructions - Here we just emit boilerplate code to set a byte-sized
 981 /// register, then move it to wherever the result should be.
 982 ///
 983 void ISel::visitSetCondInst(SetCondInst &I) {
 984   if (canFoldSetCCIntoBranchOrSelect(&I))
 985     return;  // Fold this into a branch or select.
 986
 987   unsigned DestReg = getReg(I);
 988   MachineBasicBlock::iterator MII = BB->end();
 989   emitSetCCOperation(BB, MII, I.getOperand(0), I.getOperand(1), I.getOpcode(),
 990                      DestReg);
 991 }
 992
 993 /// emitSetCCOperation - Common code shared between visitSetCondInst and
 994 /// constant expression support.
 995 ///
 996 void ISel::emitSetCCOperation(MachineBasicBlock *MBB,
 997                               MachineBasicBlock::iterator IP,
 998                               Value *Op0, Value *Op1, unsigned Opcode,
 999                               unsigned TargetReg) {
1000   unsigned OpNum = getSetCCNumber(Opcode);
1001   OpNum = EmitComparison(OpNum, Op0, Op1, MBB, IP);
1002
1003   const Type *CompTy = Op0->getType();
1004   unsigned CompClass = getClassB(CompTy);
1005   bool isSigned = CompTy->isSigned() && CompClass != cFP;
1006
1007   if (CompClass != cLong || OpNum < 2) {
1008     // Handle normal comparisons with a setcc instruction...
1009     BuildMI(*MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, TargetReg);
1010   } else {
1011     // Handle long comparisons by copying the value which is already in BL into
1012     // the register we want...
1013     BuildMI(*MBB, IP, X86::MOV8rr, 1, TargetReg).addReg(X86::BL);
1014   }
1015 }
1016
1017 void ISel::visitSelectInst(SelectInst &SI) {
1018   unsigned DestReg = getReg(SI);
1019   MachineBasicBlock::iterator MII = BB->end();
1020   emitSelectOperation(BB, MII, SI.getCondition(), SI.getTrueValue(),
1021                       SI.getFalseValue(), DestReg);
1022 }
1023
1024 /// emitSelect - Common code shared between visitSelectInst and the constant
1025 /// expression support.
1026 void ISel::emitSelectOperation(MachineBasicBlock *MBB,
1027                                MachineBasicBlock::iterator IP,
1028                                Value *Cond, Value *TrueVal, Value *FalseVal,
1029                                unsigned DestReg) {
1030   unsigned SelectClass = getClassB(TrueVal->getType());
1031
1032   // We don't support 8-bit conditional moves.  If we have incoming constants,
1033   // transform them into 16-bit constants to avoid having a run-time conversion.
1034   if (SelectClass == cByte) {
1035     if (Constant *T = dyn_cast<Constant>(TrueVal))
1036       TrueVal = ConstantExpr::getCast(T, Type::ShortTy);
1037     if (Constant *F = dyn_cast<Constant>(FalseVal))
1038       FalseVal = ConstantExpr::getCast(F, Type::ShortTy);
1039   }
1040
1041
1042   unsigned Opcode;
1043   if (SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(Cond)) {
1044     // We successfully folded the setcc into the select instruction.
1045
1046     unsigned OpNum = getSetCCNumber(SCI->getOpcode());
1047     OpNum = EmitComparison(OpNum, SCI->getOperand(0), SCI->getOperand(1), MBB,
1048                            IP);
1049
1050     const Type *CompTy = SCI->getOperand(0)->getType();
1051     bool isSigned = CompTy->isSigned() && getClassB(CompTy) != cFP;
1052
1053     // LLVM  -> X86 signed  X86 unsigned
1054     // -----    ----------  ------------
1055     // seteq -> cmovNE      cmovNE
1056     // setne -> cmovE       cmovE
1057     // setlt -> cmovGE      cmovAE
1058     // setge -> cmovL       cmovB
1059     // setgt -> cmovLE      cmovBE
1060     // setle -> cmovG       cmovA
1061     // ----
1062     //          cmovNS              // Used by comparison with 0 optimization
1063     //          cmovS
1064
1065     switch (SelectClass) {
1066     default: assert(0 && "Unknown value class!");
1067     case cFP: {
1068       // Annoyingly, we don't have a full set of floating point conditional
1069       // moves.  :(
1070       static const unsigned OpcodeTab[2][8] = {
1071         { X86::FCMOVNE, X86::FCMOVE, X86::FCMOVAE, X86::FCMOVB,
1072           X86::FCMOVBE, X86::FCMOVA, 0, 0 },
1073         { X86::FCMOVNE, X86::FCMOVE, 0, 0, 0, 0, 0, 0 },
1074       };
1075       Opcode = OpcodeTab[isSigned][OpNum];
1076
1077       // If opcode == 0, we hit a case that we don't support.  Output a setcc
1078       // and compare the result against zero.
1079       if (Opcode == 0) {
1080         unsigned CompClass = getClassB(CompTy);
1081         unsigned CondReg;
1082         if (CompClass != cLong || OpNum < 2) {
1083           CondReg = makeAnotherReg(Type::BoolTy);
1084           // Handle normal comparisons with a setcc instruction...
1085           BuildMI(*MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, CondReg);
1086         } else {
1087           // Long comparisons end up in the BL register.
1088           CondReg = X86::BL;
1089         }
1090
1091         BuildMI(*MBB, IP, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
1092         Opcode = X86::FCMOVE;
1093       }
1094       break;
1095     }
1096     case cByte:
1097     case cShort: {
1098       static const unsigned OpcodeTab[2][8] = {
1099         { X86::CMOVNE16rr, X86::CMOVE16rr, X86::CMOVAE16rr, X86::CMOVB16rr,
1100           X86::CMOVBE16rr, X86::CMOVA16rr, 0, 0 },
1101         { X86::CMOVNE16rr, X86::CMOVE16rr, X86::CMOVGE16rr, X86::CMOVL16rr,
1102           X86::CMOVLE16rr, X86::CMOVG16rr, X86::CMOVNS16rr, X86::CMOVS16rr },
1103       };
1104       Opcode = OpcodeTab[isSigned][OpNum];
1105       break;
1106     }
1107     case cInt:
1108     case cLong: {
1109       static const unsigned OpcodeTab[2][8] = {
1110         { X86::CMOVNE32rr, X86::CMOVE32rr, X86::CMOVAE32rr, X86::CMOVB32rr,
1111           X86::CMOVBE32rr, X86::CMOVA32rr, 0, 0 },
1112         { X86::CMOVNE32rr, X86::CMOVE32rr, X86::CMOVGE32rr, X86::CMOVL32rr,
1113           X86::CMOVLE32rr, X86::CMOVG32rr, X86::CMOVNS32rr, X86::CMOVS32rr },
1114       };
1115       Opcode = OpcodeTab[isSigned][OpNum];
1116       break;
1117     }
1118     }
1119   } else {
1120     // Get the value being branched on, and use it to set the condition codes.
1121     unsigned CondReg = getReg(Cond, MBB, IP);
1122     BuildMI(*MBB, IP, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
1123     switch (SelectClass) {
1124     default: assert(0 && "Unknown value class!");
1125     case cFP:    Opcode = X86::FCMOVE; break;
1126     case cByte:
1127     case cShort: Opcode = X86::CMOVE16rr; break;
1128     case cInt:
1129     case cLong:  Opcode = X86::CMOVE32rr; break;
1130     }
1131   }
1132
1133   unsigned TrueReg  = getReg(TrueVal, MBB, IP);
1134   unsigned FalseReg = getReg(FalseVal, MBB, IP);
1135   unsigned RealDestReg = DestReg;
1136
1137
1138   // Annoyingly enough, X86 doesn't HAVE 8-bit conditional moves.  Because of
1139   // this, we have to promote the incoming values to 16 bits, perform a 16-bit
1140   // cmove, then truncate the result.
1141   if (SelectClass == cByte) {
1142     DestReg = makeAnotherReg(Type::ShortTy);
1143     if (getClassB(TrueVal->getType()) == cByte) {
1144       // Promote the true value, by storing it into AL, and reading from AX.
1145       BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::AL).addReg(TrueReg);
1146       BuildMI(*MBB, IP, X86::MOV8ri, 1, X86::AH).addImm(0);
1147       TrueReg = makeAnotherReg(Type::ShortTy);
1148       BuildMI(*MBB, IP, X86::MOV16rr, 1, TrueReg).addReg(X86::AX);
1149     }
1150     if (getClassB(FalseVal->getType()) == cByte) {
1151       // Promote the true value, by storing it into CL, and reading from CX.
1152       BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(FalseReg);
1153       BuildMI(*MBB, IP, X86::MOV8ri, 1, X86::CH).addImm(0);
1154       FalseReg = makeAnotherReg(Type::ShortTy);
1155       BuildMI(*MBB, IP, X86::MOV16rr, 1, FalseReg).addReg(X86::CX);
1156     }
1157   }
1158
1159   BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(TrueReg).addReg(FalseReg);
1160
1161   switch (SelectClass) {
1162   case cByte:
1163     // We did the computation with 16-bit registers.  Truncate back to our
1164     // result by copying into AX then copying out AL.
1165     BuildMI(*MBB, IP, X86::MOV16rr, 1, X86::AX).addReg(DestReg);
1166     BuildMI(*MBB, IP, X86::MOV8rr, 1, RealDestReg).addReg(X86::AL);
1167     break;
1168   case cLong:
1169     // Move the upper half of the value as well.
1170     BuildMI(*MBB, IP, Opcode, 2,DestReg+1).addReg(TrueReg+1).addReg(FalseReg+1);
1171     break;
1172   }
1173 }
1174
1175
1176
1177 /// promote32 - Emit instructions to turn a narrow operand into a 32-bit-wide
1178 /// operand, in the specified target register.
1179 ///
1180 void ISel::promote32(unsigned targetReg, const ValueRecord &VR) {
1181   bool isUnsigned = VR.Ty->isUnsigned();
1182
1183   Value *Val = VR.Val;
1184   const Type *Ty = VR.Ty;
1185   if (Val) {
1186     if (Constant *C = dyn_cast<Constant>(Val)) {
1187       Val = ConstantExpr::getCast(C, Type::IntTy);
1188       Ty = Type::IntTy;
1189     }
1190
1191     // If this is a simple constant, just emit a MOVri directly to avoid the
1192     // copy.
1193     if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
1194       int TheVal = CI->getRawValue() & 0xFFFFFFFF;
1195     BuildMI(BB, X86::MOV32ri, 1, targetReg).addImm(TheVal);
1196       return;
1197     }
1198   }
1199
1200   // Make sure we have the register number for this value...
1201   unsigned Reg = Val ? getReg(Val) : VR.Reg;
1202
1203   switch (getClassB(Ty)) {
1204   case cByte:
1205     // Extend value into target register (8->32)
1206     if (isUnsigned)
1207       BuildMI(BB, X86::MOVZX32rr8, 1, targetReg).addReg(Reg);
1208     else
1209       BuildMI(BB, X86::MOVSX32rr8, 1, targetReg).addReg(Reg);
1210     break;
1211   case cShort:
1212     // Extend value into target register (16->32)
1213     if (isUnsigned)
1214       BuildMI(BB, X86::MOVZX32rr16, 1, targetReg).addReg(Reg);
1215     else
1216       BuildMI(BB, X86::MOVSX32rr16, 1, targetReg).addReg(Reg);
1217     break;
1218   case cInt:
1219     // Move value into target register (32->32)
1220     BuildMI(BB, X86::MOV32rr, 1, targetReg).addReg(Reg);
1221     break;
1222   default:
1223     assert(0 && "Unpromotable operand class in promote32");
1224   }
1225 }
1226
1227 /// 'ret' instruction - Here we are interested in meeting the x86 ABI.  As such,
1228 /// we have the following possibilities:
1229 ///
1230 ///   ret void: No return value, simply emit a 'ret' instruction
1231 ///   ret sbyte, ubyte : Extend value into EAX and return
1232 ///   ret short, ushort: Extend value into EAX and return
1233 ///   ret int, uint    : Move value into EAX and return
1234 ///   ret pointer      : Move value into EAX and return
1235 ///   ret long, ulong  : Move value into EAX/EDX and return
1236 ///   ret float/double : Top of FP stack
1237 ///
1238 void ISel::visitReturnInst(ReturnInst &I) {
1239   if (I.getNumOperands() == 0) {
1240     BuildMI(BB, X86::RET, 0); // Just emit a 'ret' instruction
1241     return;
1242   }
1243
1244   Value *RetVal = I.getOperand(0);
1245   switch (getClassB(RetVal->getType())) {
1246   case cByte:   // integral return values: extend or move into EAX and return
1247   case cShort:
1248   case cInt:
1249     promote32(X86::EAX, ValueRecord(RetVal));
1250     // Declare that EAX is live on exit
1251     BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::EAX).addReg(X86::ESP);
1252     break;
1253   case cFP: {                  // Floats & Doubles: Return in ST(0)
1254     unsigned RetReg = getReg(RetVal);
1255     BuildMI(BB, X86::FpSETRESULT, 1).addReg(RetReg);
1256     // Declare that top-of-stack is live on exit
1257     BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::ST0).addReg(X86::ESP);
1258     break;
1259   }
1260   case cLong: {
1261     unsigned RetReg = getReg(RetVal);
1262     BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(RetReg);
1263     BuildMI(BB, X86::MOV32rr, 1, X86::EDX).addReg(RetReg+1);
1264     // Declare that EAX & EDX are live on exit
1265     BuildMI(BB, X86::IMPLICIT_USE, 3).addReg(X86::EAX).addReg(X86::EDX)
1266       .addReg(X86::ESP);
1267     break;
1268   }
1269   default:
1270     visitInstruction(I);
1271   }
1272   // Emit a 'ret' instruction
1273   BuildMI(BB, X86::RET, 0);
1274 }
1275
1276 // getBlockAfter - Return the basic block which occurs lexically after the
1277 // specified one.
1278 static inline BasicBlock *getBlockAfter(BasicBlock *BB) {
1279   Function::iterator I = BB; ++I;  // Get iterator to next block
1280   return I != BB->getParent()->end() ? &*I : 0;
1281 }
1282
1283 /// visitBranchInst - Handle conditional and unconditional branches here.  Note
1284 /// that since code layout is frozen at this point, that if we are trying to
1285 /// jump to a block that is the immediate successor of the current block, we can
1286 /// just make a fall-through (but we don't currently).
1287 ///
1288 void ISel::visitBranchInst(BranchInst &BI) {
1289   BasicBlock *NextBB = getBlockAfter(BI.getParent());  // BB after current one
1290
1291   if (!BI.isConditional()) {  // Unconditional branch?
1292     if (BI.getSuccessor(0) != NextBB)
1293       BuildMI(BB, X86::JMP, 1).addPCDisp(BI.getSuccessor(0));
1294     return;
1295   }
1296
1297   // See if we can fold the setcc into the branch itself...
1298   SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(BI.getCondition());
1299   if (SCI == 0) {
1300     // Nope, cannot fold setcc into this branch.  Emit a branch on a condition
1301     // computed some other way...
1302     unsigned condReg = getReg(BI.getCondition());
1303     BuildMI(BB, X86::TEST8rr, 2).addReg(condReg).addReg(condReg);
1304     if (BI.getSuccessor(1) == NextBB) {
1305       if (BI.getSuccessor(0) != NextBB)
1306         BuildMI(BB, X86::JNE, 1).addPCDisp(BI.getSuccessor(0));
1307     } else {
1308       BuildMI(BB, X86::JE, 1).addPCDisp(BI.getSuccessor(1));
1309
1310       if (BI.getSuccessor(0) != NextBB)
1311         BuildMI(BB, X86::JMP, 1).addPCDisp(BI.getSuccessor(0));
1312     }
1313     return;
1314   }
1315
1316   unsigned OpNum = getSetCCNumber(SCI->getOpcode());
1317   MachineBasicBlock::iterator MII = BB->end();
1318   OpNum = EmitComparison(OpNum, SCI->getOperand(0), SCI->getOperand(1), BB,MII);
1319
1320   const Type *CompTy = SCI->getOperand(0)->getType();
1321   bool isSigned = CompTy->isSigned() && getClassB(CompTy) != cFP;
1322
1323
1324   // LLVM  -> X86 signed  X86 unsigned
1325   // -----    ----------  ------------
1326   // seteq -> je          je
1327   // setne -> jne         jne
1328   // setlt -> jl          jb
1329   // setge -> jge         jae
1330   // setgt -> jg          ja
1331   // setle -> jle         jbe
1332   // ----
1333   //          js                  // Used by comparison with 0 optimization
1334   //          jns
1335
1336   static const unsigned OpcodeTab[2][8] = {
1337     { X86::JE, X86::JNE, X86::JB, X86::JAE, X86::JA, X86::JBE, 0, 0 },
1338     { X86::JE, X86::JNE, X86::JL, X86::JGE, X86::JG, X86::JLE,
1339       X86::JS, X86::JNS },
1340   };
1341
1342   if (BI.getSuccessor(0) != NextBB) {
1343     BuildMI(BB, OpcodeTab[isSigned][OpNum], 1).addPCDisp(BI.getSuccessor(0));
1344     if (BI.getSuccessor(1) != NextBB)
1345       BuildMI(BB, X86::JMP, 1).addPCDisp(BI.getSuccessor(1));
1346   } else {
1347     // Change to the inverse condition...
1348     if (BI.getSuccessor(1) != NextBB) {
1349       OpNum ^= 1;
1350       BuildMI(BB, OpcodeTab[isSigned][OpNum], 1).addPCDisp(BI.getSuccessor(1));
1351     }
1352   }
1353 }
1354
1355
1356 /// doCall - This emits an abstract call instruction, setting up the arguments
1357 /// and the return value as appropriate.  For the actual function call itself,
1358 /// it inserts the specified CallMI instruction into the stream.
1359 ///
1360 void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
1361                   const std::vector<ValueRecord> &Args) {
1362
1363   // Count how many bytes are to be pushed on the stack...
1364   unsigned NumBytes = 0;
1365
1366   if (!Args.empty()) {
1367     for (unsigned i = 0, e = Args.size(); i != e; ++i)
1368       switch (getClassB(Args[i].Ty)) {
1369       case cByte: case cShort: case cInt:
1370         NumBytes += 4; break;
1371       case cLong:
1372         NumBytes += 8; break;
1373       case cFP:
1374         NumBytes += Args[i].Ty == Type::FloatTy ? 4 : 8;
1375         break;
1376       default: assert(0 && "Unknown class!");
1377       }
1378
1379     // Adjust the stack pointer for the new arguments...
1380     BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addImm(NumBytes);
1381
1382     // Arguments go on the stack in reverse order, as specified by the ABI.
1383     unsigned ArgOffset = 0;
1384     for (unsigned i = 0, e = Args.size(); i != e; ++i) {
1385       unsigned ArgReg;
1386       switch (getClassB(Args[i].Ty)) {
1387       case cByte:
1388       case cShort:
1389         if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) {
1390           // Zero/Sign extend constant, then stuff into memory.
1391           ConstantInt *Val = cast<ConstantInt>(Args[i].Val);
1392           Val = cast<ConstantInt>(ConstantExpr::getCast(Val, Type::IntTy));
1393           addRegOffset(BuildMI(BB, X86::MOV32mi, 5), X86::ESP, ArgOffset)
1394             .addImm(Val->getRawValue() & 0xFFFFFFFF);
1395         } else {
1396           // Promote arg to 32 bits wide into a temporary register...
1397           ArgReg = makeAnotherReg(Type::UIntTy);
1398           promote32(ArgReg, Args[i]);
1399           addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
1400                        X86::ESP, ArgOffset).addReg(ArgReg);
1401         }
1402         break;
1403       case cInt:
1404         if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) {
1405           unsigned Val = cast<ConstantInt>(Args[i].Val)->getRawValue();
1406           addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
1407                        X86::ESP, ArgOffset).addImm(Val);
1408         } else {
1409           ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
1410           addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
1411                        X86::ESP, ArgOffset).addReg(ArgReg);
1412         }
1413         break;
1414       case cLong:
1415         if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) {
1416           uint64_t Val = cast<ConstantInt>(Args[i].Val)->getRawValue();
1417           addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
1418                        X86::ESP, ArgOffset).addImm(Val & ~0U);
1419           addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
1420                        X86::ESP, ArgOffset+4).addImm(Val >> 32ULL);
1421         } else {
1422           ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
1423           addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
1424                        X86::ESP, ArgOffset).addReg(ArgReg);
1425           addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
1426                        X86::ESP, ArgOffset+4).addReg(ArgReg+1);
1427         }
1428         ArgOffset += 4;        // 8 byte entry, not 4.
1429         break;
1430
1431       case cFP:
1432         ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
1433         if (Args[i].Ty == Type::FloatTy) {
1434           addRegOffset(BuildMI(BB, X86::FST32m, 5),
1435                        X86::ESP, ArgOffset).addReg(ArgReg);
1436         } else {
1437           assert(Args[i].Ty == Type::DoubleTy && "Unknown FP type!");
1438           addRegOffset(BuildMI(BB, X86::FST64m, 5),
1439                        X86::ESP, ArgOffset).addReg(ArgReg);
1440           ArgOffset += 4;       // 8 byte entry, not 4.
1441         }
1442         break;
1443
1444       default: assert(0 && "Unknown class!");
1445       }
1446       ArgOffset += 4;
1447     }
1448   } else {
1449     BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addImm(0);
1450   }
1451
1452   BB->push_back(CallMI);
1453
1454   BuildMI(BB, X86::ADJCALLSTACKUP, 1).addImm(NumBytes);
1455
1456   // If there is a return value, scavenge the result from the location the call
1457   // leaves it in...
1458   //
1459   if (Ret.Ty != Type::VoidTy) {
1460     unsigned DestClass = getClassB(Ret.Ty);
1461     switch (DestClass) {
1462     case cByte:
1463     case cShort:
1464     case cInt: {
1465       // Integral results are in %eax, or the appropriate portion
1466       // thereof.
1467       static const unsigned regRegMove[] = {
1468         X86::MOV8rr, X86::MOV16rr, X86::MOV32rr
1469       };
1470       static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX };
1471       BuildMI(BB, regRegMove[DestClass], 1, Ret.Reg).addReg(AReg[DestClass]);
1472       break;
1473     }
1474     case cFP:     // Floating-point return values live in %ST(0)
1475       BuildMI(BB, X86::FpGETRESULT, 1, Ret.Reg);
1476       break;
1477     case cLong:   // Long values are left in EDX:EAX
1478       BuildMI(BB, X86::MOV32rr, 1, Ret.Reg).addReg(X86::EAX);
1479       BuildMI(BB, X86::MOV32rr, 1, Ret.Reg+1).addReg(X86::EDX);
1480       break;
1481     default: assert(0 && "Unknown class!");
1482     }
1483   }
1484 }
1485
1486
1487 /// visitCallInst - Push args on stack and do a procedure call instruction.
1488 void ISel::visitCallInst(CallInst &CI) {
1489   MachineInstr *TheCall;
1490   if (Function *F = CI.getCalledFunction()) {
1491     // Is it an intrinsic function call?
1492     if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) {
1493       visitIntrinsicCall(ID, CI);   // Special intrinsics are not handled here
1494       return;
1495     }
1496
1497     // Emit a CALL instruction with PC-relative displacement.
1498     TheCall = BuildMI(X86::CALLpcrel32, 1).addGlobalAddress(F, true);
1499   } else {  // Emit an indirect call...
1500     unsigned Reg = getReg(CI.getCalledValue());
1501     TheCall = BuildMI(X86::CALL32r, 1).addReg(Reg);
1502   }
1503
1504   std::vector<ValueRecord> Args;
1505   for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
1506     Args.push_back(ValueRecord(CI.getOperand(i)));
1507
1508   unsigned DestReg = CI.getType() != Type::VoidTy ? getReg(CI) : 0;
1509   doCall(ValueRecord(DestReg, CI.getType()), TheCall, Args);
1510 }
1511
1512
1513 /// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the
1514 /// function, lowering any calls to unknown intrinsic functions into the
1515 /// equivalent LLVM code.
1516 ///
1517 void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
1518   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
1519     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
1520       if (CallInst *CI = dyn_cast<CallInst>(I++))
1521         if (Function *F = CI->getCalledFunction())
1522           switch (F->getIntrinsicID()) {
1523           case Intrinsic::not_intrinsic:
1524           case Intrinsic::vastart:
1525           case Intrinsic::vacopy:
1526           case Intrinsic::vaend:
1527           case Intrinsic::returnaddress:
1528           case Intrinsic::frameaddress:
1529           case Intrinsic::memcpy:
1530           case Intrinsic::memset:
1531           case Intrinsic::readport:
1532           case Intrinsic::writeport:
1533             // We directly implement these intrinsics
1534             break;
1535           default:
1536             // All other intrinsic calls we must lower.
1537             Instruction *Before = CI->getPrev();
1538             TM.getIntrinsicLowering().LowerIntrinsicCall(CI);
1539             if (Before) {        // Move iterator to instruction after call
1540               I = Before;  ++I;
1541             } else {
1542               I = BB->begin();
1543             }
1544           }
1545
1546 }
1547
1548 void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
1549   unsigned TmpReg1, TmpReg2;
1550   switch (ID) {
1551   case Intrinsic::vastart:
1552     // Get the address of the first vararg value...
1553     TmpReg1 = getReg(CI);
1554     addFrameReference(BuildMI(BB, X86::LEA32r, 5, TmpReg1), VarArgsFrameIndex);
1555     return;
1556
1557   case Intrinsic::vacopy:
1558     TmpReg1 = getReg(CI);
1559     TmpReg2 = getReg(CI.getOperand(1));
1560     BuildMI(BB, X86::MOV32rr, 1, TmpReg1).addReg(TmpReg2);
1561     return;
1562   case Intrinsic::vaend: return;   // Noop on X86
1563
1564   case Intrinsic::returnaddress:
1565   case Intrinsic::frameaddress:
1566     TmpReg1 = getReg(CI);
1567     if (cast<Constant>(CI.getOperand(1))->isNullValue()) {
1568       if (ID == Intrinsic::returnaddress) {
1569         // Just load the return address
1570         addFrameReference(BuildMI(BB, X86::MOV32rm, 4, TmpReg1),
1571                           ReturnAddressIndex);
1572       } else {
1573         addFrameReference(BuildMI(BB, X86::LEA32r, 4, TmpReg1),
1574                           ReturnAddressIndex, -4);
1575       }
1576     } else {
1577       // Values other than zero are not implemented yet.
1578       BuildMI(BB, X86::MOV32ri, 1, TmpReg1).addImm(0);
1579     }
1580     return;
1581
1582   case Intrinsic::memcpy: {
1583     assert(CI.getNumOperands() == 5 && "Illegal llvm.memcpy call!");
1584     unsigned Align = 1;
1585     if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
1586       Align = AlignC->getRawValue();
1587       if (Align == 0) Align = 1;
1588     }
1589
1590     // Turn the byte code into # iterations
1591     unsigned CountReg;
1592     unsigned Opcode;
1593     switch (Align & 3) {
1594     case 2:   // WORD aligned
1595       if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
1596         CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/2));
1597       } else {
1598         CountReg = makeAnotherReg(Type::IntTy);
1599         unsigned ByteReg = getReg(CI.getOperand(3));
1600         BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(1);
1601       }
1602       Opcode = X86::REP_MOVSW;
1603       break;
1604     case 0:   // DWORD aligned
1605       if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
1606         CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/4));
1607       } else {
1608         CountReg = makeAnotherReg(Type::IntTy);
1609         unsigned ByteReg = getReg(CI.getOperand(3));
1610         BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(2);
1611       }
1612       Opcode = X86::REP_MOVSD;
1613       break;
1614     default:  // BYTE aligned
1615       CountReg = getReg(CI.getOperand(3));
1616       Opcode = X86::REP_MOVSB;
1617       break;
1618     }
1619
1620     // No matter what the alignment is, we put the source in ESI, the
1621     // destination in EDI, and the count in ECX.
1622     TmpReg1 = getReg(CI.getOperand(1));
1623     TmpReg2 = getReg(CI.getOperand(2));
1624     BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg);
1625     BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1);
1626     BuildMI(BB, X86::MOV32rr, 1, X86::ESI).addReg(TmpReg2);
1627     BuildMI(BB, Opcode, 0);
1628     return;
1629   }
1630   case Intrinsic::memset: {
1631     assert(CI.getNumOperands() == 5 && "Illegal llvm.memset call!");
1632     unsigned Align = 1;
1633     if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
1634       Align = AlignC->getRawValue();
1635       if (Align == 0) Align = 1;
1636     }
1637
1638     // Turn the byte code into # iterations
1639     unsigned CountReg;
1640     unsigned Opcode;
1641     if (ConstantInt *ValC = dyn_cast<ConstantInt>(CI.getOperand(2))) {
1642       unsigned Val = ValC->getRawValue() & 255;
1643
1644       // If the value is a constant, then we can potentially use larger copies.
1645       switch (Align & 3) {
1646       case 2:   // WORD aligned
1647         if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
1648           CountReg =getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/2));
1649         } else {
1650           CountReg = makeAnotherReg(Type::IntTy);
1651           unsigned ByteReg = getReg(CI.getOperand(3));
1652           BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(1);
1653         }
1654         BuildMI(BB, X86::MOV16ri, 1, X86::AX).addImm((Val << 8) | Val);
1655         Opcode = X86::REP_STOSW;
1656         break;
1657       case 0:   // DWORD aligned
1658         if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
1659           CountReg =getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/4));
1660         } else {
1661           CountReg = makeAnotherReg(Type::IntTy);
1662           unsigned ByteReg = getReg(CI.getOperand(3));
1663           BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(2);
1664         }
1665         Val = (Val << 8) | Val;
1666         BuildMI(BB, X86::MOV32ri, 1, X86::EAX).addImm((Val << 16) | Val);
1667         Opcode = X86::REP_STOSD;
1668         break;
1669       default:  // BYTE aligned
1670         CountReg = getReg(CI.getOperand(3));
1671         BuildMI(BB, X86::MOV8ri, 1, X86::AL).addImm(Val);
1672         Opcode = X86::REP_STOSB;
1673         break;
1674       }
1675     } else {
1676       // If it's not a constant value we are storing, just fall back.  We could
1677       // try to be clever to form 16 bit and 32 bit values, but we don't yet.
1678       unsigned ValReg = getReg(CI.getOperand(2));
1679       BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg);
1680       CountReg = getReg(CI.getOperand(3));
1681       Opcode = X86::REP_STOSB;
1682     }
1683
1684     // No matter what the alignment is, we put the source in ESI, the
1685     // destination in EDI, and the count in ECX.
1686     TmpReg1 = getReg(CI.getOperand(1));
1687     //TmpReg2 = getReg(CI.getOperand(2));
1688     BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg);
1689     BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1);
1690     BuildMI(BB, Opcode, 0);
1691     return;
1692   }
1693
1694   case Intrinsic::readport: {
1695     // First, determine that the size of the operand falls within the acceptable
1696     // range for this architecture.
1697     //
1698     if (getClassB(CI.getOperand(1)->getType()) != cShort) {
1699       std::cerr << "llvm.readport: Address size is not 16 bits\n";
1700       exit(1);
1701     }
1702
1703     // Now, move the I/O port address into the DX register and use the IN
1704     // instruction to get the input data.
1705     //
1706     unsigned Class = getClass(CI.getCalledFunction()->getReturnType());
1707     unsigned DestReg = getReg(CI);
1708
1709     // If the port is a single-byte constant, use the immediate form.
1710     if (ConstantInt *C = dyn_cast<ConstantInt>(CI.getOperand(1)))
1711       if ((C->getRawValue() & 255) == C->getRawValue()) {
1712         switch (Class) {
1713         case cByte:
1714           BuildMI(BB, X86::IN8ri, 1).addImm((unsigned char)C->getRawValue());
1715           BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
1716           return;
1717         case cShort:
1718           BuildMI(BB, X86::IN16ri, 1).addImm((unsigned char)C->getRawValue());
1719           BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX);
1720           return;
1721         case cInt:
1722           BuildMI(BB, X86::IN32ri, 1).addImm((unsigned char)C->getRawValue());
1723           BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX);
1724           return;
1725         }
1726       }
1727
1728     unsigned Reg = getReg(CI.getOperand(1));
1729     BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg);
1730     switch (Class) {
1731     case cByte:
1732       BuildMI(BB, X86::IN8rr, 0);
1733       BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
1734       break;
1735     case cShort:
1736       BuildMI(BB, X86::IN16rr, 0);
1737       BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX);
1738       break;
1739     case cInt:
1740       BuildMI(BB, X86::IN32rr, 0);
1741       BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX);
1742       break;
1743     default:
1744       std::cerr << "Cannot do input on this data type";
1745       exit (1);
1746     }
1747     return;
1748   }
1749
1750   case Intrinsic::writeport: {
1751     // First, determine that the size of the operand falls within the
1752     // acceptable range for this architecture.
1753     if (getClass(CI.getOperand(2)->getType()) != cShort) {
1754       std::cerr << "llvm.writeport: Address size is not 16 bits\n";
1755       exit(1);
1756     }
1757
1758     unsigned Class = getClassB(CI.getOperand(1)->getType());
1759     unsigned ValReg = getReg(CI.getOperand(1));
1760     switch (Class) {
1761     case cByte:
1762       BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg);
1763       break;
1764     case cShort:
1765       BuildMI(BB, X86::MOV16rr, 1, X86::AX).addReg(ValReg);
1766       break;
1767     case cInt:
1768       BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(ValReg);
1769       break;
1770     default:
1771       std::cerr << "llvm.writeport: invalid data type for X86 target";
1772       exit(1);
1773     }
1774
1775
1776     // If the port is a single-byte constant, use the immediate form.
1777     if (ConstantInt *C = dyn_cast<ConstantInt>(CI.getOperand(2)))
1778       if ((C->getRawValue() & 255) == C->getRawValue()) {
1779         static const unsigned O[] = { X86::OUT8ir, X86::OUT16ir, X86::OUT32ir };
1780         BuildMI(BB, O[Class], 1).addImm((unsigned char)C->getRawValue());
1781         return;
1782       }
1783
1784     // Otherwise, move the I/O port address into the DX register and the value
1785     // to write into the AL/AX/EAX register.
1786     static const unsigned Opc[] = { X86::OUT8rr, X86::OUT16rr, X86::OUT32rr };
1787     unsigned Reg = getReg(CI.getOperand(2));
1788     BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg);
1789     BuildMI(BB, Opc[Class], 0);
1790     return;
1791   }
1792
1793   default: assert(0 && "Error: unknown intrinsics should have been lowered!");
1794   }
1795 }
1796
1797 static bool isSafeToFoldLoadIntoInstruction(LoadInst &LI, Instruction &User) {
1798   if (LI.getParent() != User.getParent())
1799     return false;
1800   BasicBlock::iterator It = &LI;
1801   // Check all of the instructions between the load and the user.  We should
1802   // really use alias analysis here, but for now we just do something simple.
1803   for (++It; It != BasicBlock::iterator(&User); ++It) {
1804     switch (It->getOpcode()) {
1805     case Instruction::Free:
1806     case Instruction::Store:
1807     case Instruction::Call:
1808     case Instruction::Invoke:
1809       return false;
1810     case Instruction::Load:
1811       if (cast<LoadInst>(It)->isVolatile() && LI.isVolatile())
1812         return false;
1813       break;
1814     }
1815   }
1816   return true;
1817 }
1818
1819 /// visitSimpleBinary - Implement simple binary operators for integral types...
1820 /// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for
1821 /// Xor.
1822 ///
1823 void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
1824   unsigned DestReg = getReg(B);
1825   MachineBasicBlock::iterator MI = BB->end();
1826   Value *Op0 = B.getOperand(0), *Op1 = B.getOperand(1);
1827
1828   // Special case: op Reg, load [mem]
1829   if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1))
1830     if (!B.swapOperands())
1831       std::swap(Op0, Op1);  // Make sure any loads are in the RHS.
1832
1833   unsigned Class = getClassB(B.getType());
1834   if (isa<LoadInst>(Op1) && Class != cLong &&
1835       isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op1), B)) {
1836
1837     unsigned Opcode;
1838     if (Class != cFP) {
1839       static const unsigned OpcodeTab[][3] = {
1840         // Arithmetic operators
1841         { X86::ADD8rm, X86::ADD16rm, X86::ADD32rm },  // ADD
1842         { X86::SUB8rm, X86::SUB16rm, X86::SUB32rm },  // SUB
1843
1844         // Bitwise operators
1845         { X86::AND8rm, X86::AND16rm, X86::AND32rm },  // AND
1846         { X86:: OR8rm, X86:: OR16rm, X86:: OR32rm },  // OR
1847         { X86::XOR8rm, X86::XOR16rm, X86::XOR32rm },  // XOR
1848       };
1849       Opcode = OpcodeTab[OperatorClass][Class];
1850     } else {
1851       static const unsigned OpcodeTab[][2] = {
1852         { X86::FADD32m, X86::FADD64m },  // ADD
1853         { X86::FSUB32m, X86::FSUB64m },  // SUB
1854       };
1855       const Type *Ty = Op0->getType();
1856       assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
1857       Opcode = OpcodeTab[OperatorClass][Ty == Type::DoubleTy];
1858     }
1859
1860     unsigned BaseReg, Scale, IndexReg, Disp;
1861     getAddressingMode(cast<LoadInst>(Op1)->getOperand(0), BaseReg,
1862                       Scale, IndexReg, Disp);
1863
1864     unsigned Op0r = getReg(Op0);
1865     addFullAddress(BuildMI(BB, Opcode, 2, DestReg).addReg(Op0r),
1866                    BaseReg, Scale, IndexReg, Disp);
1867     return;
1868   }
1869
1870   // If this is a floating point subtract, check to see if we can fold the first
1871   // operand in.
1872   if (Class == cFP && OperatorClass == 1 &&
1873       isa<LoadInst>(Op0) &&
1874       isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op0), B)) {
1875     const Type *Ty = Op0->getType();
1876     assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
1877     unsigned Opcode = Ty == Type::FloatTy ? X86::FSUBR32m : X86::FSUBR64m;
1878
1879     unsigned BaseReg, Scale, IndexReg, Disp;
1880     getAddressingMode(cast<LoadInst>(Op0)->getOperand(0), BaseReg,
1881                       Scale, IndexReg, Disp);
1882
1883     unsigned Op1r = getReg(Op1);
1884     addFullAddress(BuildMI(BB, Opcode, 2, DestReg).addReg(Op1r),
1885                    BaseReg, Scale, IndexReg, Disp);
1886     return;
1887   }
1888
1889   emitSimpleBinaryOperation(BB, MI, Op0, Op1, OperatorClass, DestReg);
1890 }
1891
1892
1893 /// emitBinaryFPOperation - This method handles emission of floating point
1894 /// Add (0), Sub (1), Mul (2), and Div (3) operations.
1895 void ISel::emitBinaryFPOperation(MachineBasicBlock *BB,
1896                                  MachineBasicBlock::iterator IP,
1897                                  Value *Op0, Value *Op1,
1898                                  unsigned OperatorClass, unsigned DestReg) {
1899
1900   // Special case: op Reg, <const fp>
1901   if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1))
1902     if (!Op1C->isExactlyValue(+0.0) && !Op1C->isExactlyValue(+1.0)) {
1903       // Create a constant pool entry for this constant.
1904       MachineConstantPool *CP = F->getConstantPool();
1905       unsigned CPI = CP->getConstantPoolIndex(Op1C);
1906       const Type *Ty = Op1->getType();
1907
1908       static const unsigned OpcodeTab[][4] = {
1909         { X86::FADD32m, X86::FSUB32m, X86::FMUL32m, X86::FDIV32m },   // Float
1910         { X86::FADD64m, X86::FSUB64m, X86::FMUL64m, X86::FDIV64m },   // Double
1911       };
1912
1913       assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
1914       unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass];
1915       unsigned Op0r = getReg(Op0, BB, IP);
1916       addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5,
1917                                        DestReg).addReg(Op0r), CPI);
1918       return;
1919     }
1920
1921   // Special case: R1 = op <const fp>, R2
1922   if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op0))
1923     if (CFP->isExactlyValue(-0.0) && OperatorClass == 1) {
1924       // -0.0 - X === -X
1925       unsigned op1Reg = getReg(Op1, BB, IP);
1926       BuildMI(*BB, IP, X86::FCHS, 1, DestReg).addReg(op1Reg);
1927       return;
1928     } else if (!CFP->isExactlyValue(+0.0) && !CFP->isExactlyValue(+1.0)) {
1929       // R1 = op CST, R2  -->  R1 = opr R2, CST
1930
1931       // Create a constant pool entry for this constant.
1932       MachineConstantPool *CP = F->getConstantPool();
1933       unsigned CPI = CP->getConstantPoolIndex(CFP);
1934       const Type *Ty = CFP->getType();
1935
1936       static const unsigned OpcodeTab[][4] = {
1937         { X86::FADD32m, X86::FSUBR32m, X86::FMUL32m, X86::FDIVR32m }, // Float
1938         { X86::FADD64m, X86::FSUBR64m, X86::FMUL64m, X86::FDIVR64m }, // Double
1939       };
1940
1941       assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
1942       unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass];
1943       unsigned Op1r = getReg(Op1, BB, IP);
1944       addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5,
1945                                        DestReg).addReg(Op1r), CPI);
1946       return;
1947     }
1948
1949   // General case.
1950   static const unsigned OpcodeTab[4] = {
1951     X86::FpADD, X86::FpSUB, X86::FpMUL, X86::FpDIV
1952   };
1953
1954   unsigned Opcode = OpcodeTab[OperatorClass];
1955   unsigned Op0r = getReg(Op0, BB, IP);
1956   unsigned Op1r = getReg(Op1, BB, IP);
1957   BuildMI(*BB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
1958 }
1959
1960 /// emitSimpleBinaryOperation - Implement simple binary operators for integral
1961 /// types...  OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for
1962 /// Or, 4 for Xor.
1963 ///
1964 /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary
1965 /// and constant expression support.
1966 ///
1967 void ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB,
1968                                      MachineBasicBlock::iterator IP,
1969                                      Value *Op0, Value *Op1,
1970                                      unsigned OperatorClass, unsigned DestReg) {
1971   unsigned Class = getClassB(Op0->getType());
1972
1973   if (Class == cFP) {
1974     assert(OperatorClass < 2 && "No logical ops for FP!");
1975     emitBinaryFPOperation(MBB, IP, Op0, Op1, OperatorClass, DestReg);
1976     return;
1977   }
1978
1979   // sub 0, X -> neg X
1980   if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0))
1981     if (OperatorClass == 1 && CI->isNullValue()) {
1982       unsigned op1Reg = getReg(Op1, MBB, IP);
1983       static unsigned const NEGTab[] = {
1984         X86::NEG8r, X86::NEG16r, X86::NEG32r, 0, X86::NEG32r
1985       };
1986       BuildMI(*MBB, IP, NEGTab[Class], 1, DestReg).addReg(op1Reg);
1987
1988       if (Class == cLong) {
1989         // We just emitted: Dl = neg Sl
1990         // Now emit       : T  = addc Sh, 0
1991         //                : Dh = neg T
1992         unsigned T = makeAnotherReg(Type::IntTy);
1993         BuildMI(*MBB, IP, X86::ADC32ri, 2, T).addReg(op1Reg+1).addImm(0);
1994         BuildMI(*MBB, IP, X86::NEG32r, 1, DestReg+1).addReg(T);
1995       }
1996       return;
1997     }
1998
1999   // Special case: op Reg, <const int>
2000   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
2001     unsigned Op0r = getReg(Op0, MBB, IP);
2002
2003     // xor X, -1 -> not X
2004     if (OperatorClass == 4 && Op1C->isAllOnesValue()) {
2005       static unsigned const NOTTab[] = {
2006         X86::NOT8r, X86::NOT16r, X86::NOT32r, 0, X86::NOT32r
2007       };
2008       BuildMI(*MBB, IP, NOTTab[Class], 1, DestReg).addReg(Op0r);
2009       if (Class == cLong)  // Invert the top part too
2010         BuildMI(*MBB, IP, X86::NOT32r, 1, DestReg+1).addReg(Op0r+1);
2011       return;
2012     }
2013
2014     // add X, -1 -> dec X
2015     if (OperatorClass == 0 && Op1C->isAllOnesValue() && Class != cLong) {
2016       // Note that we can't use dec for 64-bit decrements, because it does not
2017       // set the carry flag!
2018       static unsigned const DECTab[] = { X86::DEC8r, X86::DEC16r, X86::DEC32r };
2019       BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r);
2020       return;
2021     }
2022
2023     // add X, 1 -> inc X
2024     if (OperatorClass == 0 && Op1C->equalsInt(1) && Class != cLong) {
2025       // Note that we can't use inc for 64-bit increments, because it does not
2026       // set the carry flag!
2027       static unsigned const INCTab[] = { X86::INC8r, X86::INC16r, X86::INC32r };
2028       BuildMI(*MBB, IP, INCTab[Class], 1, DestReg).addReg(Op0r);
2029       return;
2030     }
2031
2032     static const unsigned OpcodeTab[][5] = {
2033       // Arithmetic operators
2034       { X86::ADD8ri, X86::ADD16ri, X86::ADD32ri, 0, X86::ADD32ri },  // ADD
2035       { X86::SUB8ri, X86::SUB16ri, X86::SUB32ri, 0, X86::SUB32ri },  // SUB
2036
2037       // Bitwise operators
2038       { X86::AND8ri, X86::AND16ri, X86::AND32ri, 0, X86::AND32ri },  // AND
2039       { X86:: OR8ri, X86:: OR16ri, X86:: OR32ri, 0, X86::OR32ri  },  // OR
2040       { X86::XOR8ri, X86::XOR16ri, X86::XOR32ri, 0, X86::XOR32ri },  // XOR
2041     };
2042
2043     unsigned Opcode = OpcodeTab[OperatorClass][Class];
2044     unsigned Op1l = cast<ConstantInt>(Op1C)->getRawValue();
2045
2046     if (Class != cLong) {
2047       BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
2048       return;
2049     }
2050
2051     // If this is a long value and the high or low bits have a special
2052     // property, emit some special cases.
2053     unsigned Op1h = cast<ConstantInt>(Op1C)->getRawValue() >> 32LL;
2054
2055     // If the constant is zero in the low 32-bits, just copy the low part
2056     // across and apply the normal 32-bit operation to the high parts.  There
2057     // will be no carry or borrow into the top.
2058     if (Op1l == 0) {
2059       if (OperatorClass != 2) // All but and...
2060         BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0r);
2061       else
2062         BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
2063       BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg+1)
2064         .addReg(Op0r+1).addImm(Op1h);
2065       return;
2066     }
2067
2068     // If this is a logical operation and the top 32-bits are zero, just
2069     // operate on the lower 32.
2070     if (Op1h == 0 && OperatorClass > 1) {
2071       BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg)
2072         .addReg(Op0r).addImm(Op1l);
2073       if (OperatorClass != 2)  // All but and
2074         BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(Op0r+1);
2075       else
2076         BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
2077       return;
2078     }
2079
2080     // TODO: We could handle lots of other special cases here, such as AND'ing
2081     // with 0xFFFFFFFF00000000 -> noop, etc.
2082
2083     // Otherwise, code generate the full operation with a constant.
2084     static const unsigned TopTab[] = {
2085       X86::ADC32ri, X86::SBB32ri, X86::AND32ri, X86::OR32ri, X86::XOR32ri
2086     };
2087
2088     BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
2089     BuildMI(*MBB, IP, TopTab[OperatorClass], 2, DestReg+1)
2090       .addReg(Op0r+1).addImm(Op1h);
2091     return;
2092   }
2093
2094   // Finally, handle the general case now.
2095   static const unsigned OpcodeTab[][5] = {
2096     // Arithmetic operators
2097     { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, 0, X86::ADD32rr },  // ADD
2098     { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, 0, X86::SUB32rr },  // SUB
2099
2100     // Bitwise operators
2101     { X86::AND8rr, X86::AND16rr, X86::AND32rr, 0, X86::AND32rr },  // AND
2102     { X86:: OR8rr, X86:: OR16rr, X86:: OR32rr, 0, X86:: OR32rr },  // OR
2103     { X86::XOR8rr, X86::XOR16rr, X86::XOR32rr, 0, X86::XOR32rr },  // XOR
2104   };
2105
2106   unsigned Opcode = OpcodeTab[OperatorClass][Class];
2107   unsigned Op0r = getReg(Op0, MBB, IP);
2108   unsigned Op1r = getReg(Op1, MBB, IP);
2109   BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
2110
2111   if (Class == cLong) {        // Handle the upper 32 bits of long values...
2112     static const unsigned TopTab[] = {
2113       X86::ADC32rr, X86::SBB32rr, X86::AND32rr, X86::OR32rr, X86::XOR32rr
2114     };
2115     BuildMI(*MBB, IP, TopTab[OperatorClass], 2,
2116             DestReg+1).addReg(Op0r+1).addReg(Op1r+1);
2117   }
2118 }
2119
2120 /// doMultiply - Emit appropriate instructions to multiply together the
2121 /// registers op0Reg and op1Reg, and put the result in DestReg.  The type of the
2122 /// result should be given as DestTy.
2123 ///
2124 void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
2125                       unsigned DestReg, const Type *DestTy,
2126                       unsigned op0Reg, unsigned op1Reg) {
2127   unsigned Class = getClass(DestTy);
2128   switch (Class) {
2129   case cInt:
2130   case cShort:
2131     BuildMI(*MBB, MBBI, Class == cInt ? X86::IMUL32rr:X86::IMUL16rr, 2, DestReg)
2132       .addReg(op0Reg).addReg(op1Reg);
2133     return;
2134   case cByte:
2135     // Must use the MUL instruction, which forces use of AL...
2136     BuildMI(*MBB, MBBI, X86::MOV8rr, 1, X86::AL).addReg(op0Reg);
2137     BuildMI(*MBB, MBBI, X86::MUL8r, 1).addReg(op1Reg);
2138     BuildMI(*MBB, MBBI, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
2139     return;
2140   default:
2141   case cLong: assert(0 && "doMultiply cannot operate on LONG values!");
2142   }
2143 }
2144
2145 // ExactLog2 - This function solves for (Val == 1 << (N-1)) and returns N.  It
2146 // returns zero when the input is not exactly a power of two.
2147 static unsigned ExactLog2(unsigned Val) {
2148   if (Val == 0) return 0;
2149   unsigned Count = 0;
2150   while (Val != 1) {
2151     if (Val & 1) return 0;
2152     Val >>= 1;
2153     ++Count;
2154   }
2155   return Count+1;
2156 }
2157
2158
2159 /// doMultiplyConst - This function is specialized to efficiently codegen an 8,
2160 /// 16, or 32-bit integer multiply by a constant.
2161 void ISel::doMultiplyConst(MachineBasicBlock *MBB,
2162                            MachineBasicBlock::iterator IP,
2163                            unsigned DestReg, const Type *DestTy,
2164                            unsigned op0Reg, unsigned ConstRHS) {
2165   static const unsigned MOVrrTab[] = {X86::MOV8rr, X86::MOV16rr, X86::MOV32rr};
2166   static const unsigned MOVriTab[] = {X86::MOV8ri, X86::MOV16ri, X86::MOV32ri};
2167
2168   unsigned Class = getClass(DestTy);
2169
2170   if (ConstRHS == 0) {
2171     BuildMI(*MBB, IP, MOVriTab[Class], 1, DestReg).addImm(0);
2172     return;
2173   } else if (ConstRHS == 1) {
2174     BuildMI(*MBB, IP, MOVrrTab[Class], 1, DestReg).addReg(op0Reg);
2175     return;
2176   }
2177
2178   // If the element size is exactly a power of 2, use a shift to get it.
2179   if (unsigned Shift = ExactLog2(ConstRHS)) {
2180     switch (Class) {
2181     default: assert(0 && "Unknown class for this function!");
2182     case cByte:
2183       BuildMI(*MBB, IP, X86::SHL32ri,2, DestReg).addReg(op0Reg).addImm(Shift-1);
2184       return;
2185     case cShort:
2186       BuildMI(*MBB, IP, X86::SHL32ri,2, DestReg).addReg(op0Reg).addImm(Shift-1);
2187       return;
2188     case cInt:
2189       BuildMI(*MBB, IP, X86::SHL32ri,2, DestReg).addReg(op0Reg).addImm(Shift-1);
2190       return;
2191     }
2192   }
2193
2194   if (Class == cShort) {
2195     BuildMI(*MBB, IP, X86::IMUL16rri,2,DestReg).addReg(op0Reg).addImm(ConstRHS);
2196     return;
2197   } else if (Class == cInt) {
2198     BuildMI(*MBB, IP, X86::IMUL32rri,2,DestReg).addReg(op0Reg).addImm(ConstRHS);
2199     return;
2200   }
2201
2202   // Most general case, emit a normal multiply...
2203   unsigned TmpReg = makeAnotherReg(DestTy);
2204   BuildMI(*MBB, IP, MOVriTab[Class], 1, TmpReg).addImm(ConstRHS);
2205
2206   // Emit a MUL to multiply the register holding the index by
2207   // elementSize, putting the result in OffsetReg.
2208   doMultiply(MBB, IP, DestReg, DestTy, op0Reg, TmpReg);
2209 }
2210
2211 /// visitMul - Multiplies are not simple binary operators because they must deal
2212 /// with the EAX register explicitly.
2213 ///
2214 void ISel::visitMul(BinaryOperator &I) {
2215   unsigned ResultReg = getReg(I);
2216
2217   Value *Op0 = I.getOperand(0);
2218   Value *Op1 = I.getOperand(1);
2219
2220   // Fold loads into floating point multiplies.
2221   if (getClass(Op0->getType()) == cFP) {
2222     if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1))
2223       if (!I.swapOperands())
2224         std::swap(Op0, Op1);  // Make sure any loads are in the RHS.
2225     if (LoadInst *LI = dyn_cast<LoadInst>(Op1))
2226       if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
2227         const Type *Ty = Op0->getType();
2228         assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
2229         unsigned Opcode = Ty == Type::FloatTy ? X86::FMUL32m : X86::FMUL64m;
2230
2231         unsigned BaseReg, Scale, IndexReg, Disp;
2232         getAddressingMode(LI->getOperand(0), BaseReg,
2233                           Scale, IndexReg, Disp);
2234
2235         unsigned Op0r = getReg(Op0);
2236         addFullAddress(BuildMI(BB, Opcode, 2, ResultReg).addReg(Op0r),
2237                        BaseReg, Scale, IndexReg, Disp);
2238         return;
2239       }
2240   }
2241
2242   MachineBasicBlock::iterator IP = BB->end();
2243   emitMultiply(BB, IP, Op0, Op1, ResultReg);
2244 }
2245
2246 void ISel::emitMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
2247                         Value *Op0, Value *Op1, unsigned DestReg) {
2248   MachineBasicBlock &BB = *MBB;
2249   TypeClass Class = getClass(Op0->getType());
2250
2251   // Simple scalar multiply?
2252   unsigned Op0Reg  = getReg(Op0, &BB, IP);
2253   switch (Class) {
2254   case cByte:
2255   case cShort:
2256   case cInt:
2257     if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
2258       unsigned Val = (unsigned)CI->getRawValue(); // Isn't a 64-bit constant
2259       doMultiplyConst(&BB, IP, DestReg, Op0->getType(), Op0Reg, Val);
2260     } else {
2261       unsigned Op1Reg  = getReg(Op1, &BB, IP);
2262       doMultiply(&BB, IP, DestReg, Op1->getType(), Op0Reg, Op1Reg);
2263     }
2264     return;
2265   case cFP:
2266     emitBinaryFPOperation(MBB, IP, Op0, Op1, 2, DestReg);
2267     return;
2268   case cLong:
2269     break;
2270   }
2271
2272   // Long value.  We have to do things the hard way...
2273   if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
2274     unsigned CLow = CI->getRawValue();
2275     unsigned CHi  = CI->getRawValue() >> 32;
2276
2277     if (CLow == 0) {
2278       // If the low part of the constant is all zeros, things are simple.
2279       BuildMI(BB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
2280       doMultiplyConst(&BB, IP, DestReg+1, Type::UIntTy, Op0Reg, CHi);
2281       return;
2282     }
2283
2284     // Multiply the two low parts... capturing carry into EDX
2285     unsigned OverflowReg = 0;
2286     if (CLow == 1) {
2287       BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0Reg);
2288     } else {
2289       unsigned Op1RegL = makeAnotherReg(Type::UIntTy);
2290       OverflowReg = makeAnotherReg(Type::UIntTy);
2291       BuildMI(BB, IP, X86::MOV32ri, 1, Op1RegL).addImm(CLow);
2292       BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
2293       BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1RegL);  // AL*BL
2294
2295       BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX);   // AL*BL
2296       BuildMI(BB, IP, X86::MOV32rr, 1,
2297               OverflowReg).addReg(X86::EDX);                    // AL*BL >> 32
2298     }
2299
2300     unsigned AHBLReg = makeAnotherReg(Type::UIntTy);   // AH*BL
2301     doMultiplyConst(&BB, IP, AHBLReg, Type::UIntTy, Op0Reg+1, CLow);
2302
2303     unsigned AHBLplusOverflowReg;
2304     if (OverflowReg) {
2305       AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
2306       BuildMI(BB, IP, X86::ADD32rr, 2,                // AH*BL+(AL*BL >> 32)
2307               AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
2308     } else {
2309       AHBLplusOverflowReg = AHBLReg;
2310     }
2311
2312     if (CHi == 0) {
2313       BuildMI(BB, IP, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg);
2314     } else {
2315       unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
2316       doMultiplyConst(&BB, IP, ALBHReg, Type::UIntTy, Op0Reg, CHi);
2317
2318       BuildMI(BB, IP, X86::ADD32rr, 2,      // AL*BH + AH*BL + (AL*BL >> 32)
2319               DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
2320     }
2321     return;
2322   }
2323
2324   // General 64x64 multiply
2325
2326   unsigned Op1Reg  = getReg(Op1, &BB, IP);
2327   // Multiply the two low parts... capturing carry into EDX
2328   BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
2329   BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1Reg);  // AL*BL
2330
2331   unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
2332   BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX);     // AL*BL
2333   BuildMI(BB, IP, X86::MOV32rr, 1,
2334           OverflowReg).addReg(X86::EDX); // AL*BL >> 32
2335
2336   unsigned AHBLReg = makeAnotherReg(Type::UIntTy);   // AH*BL
2337   BuildMI(BB, IP, X86::IMUL32rr, 2,
2338           AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
2339
2340   unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
2341   BuildMI(BB, IP, X86::ADD32rr, 2,                // AH*BL+(AL*BL >> 32)
2342           AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
2343
2344   unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
2345   BuildMI(BB, IP, X86::IMUL32rr, 2,
2346           ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
2347
2348   BuildMI(BB, IP, X86::ADD32rr, 2,      // AL*BH + AH*BL + (AL*BL >> 32)
2349           DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
2350 }
2351
2352
2353 /// visitDivRem - Handle division and remainder instructions... these
2354 /// instruction both require the same instructions to be generated, they just
2355 /// select the result from a different register.  Note that both of these
2356 /// instructions work differently for signed and unsigned operands.
2357 ///
2358 void ISel::visitDivRem(BinaryOperator &I) {
2359   unsigned ResultReg = getReg(I);
2360   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2361
2362   // Fold loads into floating point divides.
2363   if (getClass(Op0->getType()) == cFP) {
2364     if (LoadInst *LI = dyn_cast<LoadInst>(Op1))
2365       if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
2366         const Type *Ty = Op0->getType();
2367         assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
2368         unsigned Opcode = Ty == Type::FloatTy ? X86::FDIV32m : X86::FDIV64m;
2369
2370         unsigned BaseReg, Scale, IndexReg, Disp;
2371         getAddressingMode(LI->getOperand(0), BaseReg,
2372                           Scale, IndexReg, Disp);
2373
2374         unsigned Op0r = getReg(Op0);
2375         addFullAddress(BuildMI(BB, Opcode, 2, ResultReg).addReg(Op0r),
2376                        BaseReg, Scale, IndexReg, Disp);
2377         return;
2378       }
2379
2380     if (LoadInst *LI = dyn_cast<LoadInst>(Op0))
2381       if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
2382         const Type *Ty = Op0->getType();
2383         assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
2384         unsigned Opcode = Ty == Type::FloatTy ? X86::FDIVR32m : X86::FDIVR64m;
2385
2386         unsigned BaseReg, Scale, IndexReg, Disp;
2387         getAddressingMode(LI->getOperand(0), BaseReg,
2388                           Scale, IndexReg, Disp);
2389
2390         unsigned Op1r = getReg(Op1);
2391         addFullAddress(BuildMI(BB, Opcode, 2, ResultReg).addReg(Op1r),
2392                        BaseReg, Scale, IndexReg, Disp);
2393         return;
2394       }
2395   }
2396
2397
2398   MachineBasicBlock::iterator IP = BB->end();
2399   emitDivRemOperation(BB, IP, Op0, Op1,
2400                       I.getOpcode() == Instruction::Div, ResultReg);
2401 }
2402
2403 void ISel::emitDivRemOperation(MachineBasicBlock *BB,
2404                                MachineBasicBlock::iterator IP,
2405                                Value *Op0, Value *Op1, bool isDiv,
2406                                unsigned ResultReg) {
2407   const Type *Ty = Op0->getType();
2408   unsigned Class = getClass(Ty);
2409   switch (Class) {
2410   case cFP:              // Floating point divide
2411     if (isDiv) {
2412       emitBinaryFPOperation(BB, IP, Op0, Op1, 3, ResultReg);
2413       return;
2414     } else {               // Floating point remainder...
2415       unsigned Op0Reg = getReg(Op0, BB, IP);
2416       unsigned Op1Reg = getReg(Op1, BB, IP);
2417       MachineInstr *TheCall =
2418         BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("fmod", true);
2419       std::vector<ValueRecord> Args;
2420       Args.push_back(ValueRecord(Op0Reg, Type::DoubleTy));
2421       Args.push_back(ValueRecord(Op1Reg, Type::DoubleTy));
2422       doCall(ValueRecord(ResultReg, Type::DoubleTy), TheCall, Args);
2423     }
2424     return;
2425   case cLong: {
2426     static const char *FnName[] =
2427       { "__moddi3", "__divdi3", "__umoddi3", "__udivdi3" };
2428     unsigned Op0Reg = getReg(Op0, BB, IP);
2429     unsigned Op1Reg = getReg(Op1, BB, IP);
2430     unsigned NameIdx = Ty->isUnsigned()*2 + isDiv;
2431     MachineInstr *TheCall =
2432       BuildMI(X86::CALLpcrel32, 1).addExternalSymbol(FnName[NameIdx], true);
2433
2434     std::vector<ValueRecord> Args;
2435     Args.push_back(ValueRecord(Op0Reg, Type::LongTy));
2436     Args.push_back(ValueRecord(Op1Reg, Type::LongTy));
2437     doCall(ValueRecord(ResultReg, Type::LongTy), TheCall, Args);
2438     return;
2439   }
2440   case cByte: case cShort: case cInt:
2441     break;          // Small integrals, handled below...
2442   default: assert(0 && "Unknown class!");
2443   }
2444
2445   static const unsigned Regs[]     ={ X86::AL    , X86::AX     , X86::EAX     };
2446   static const unsigned MovOpcode[]={ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr };
2447   static const unsigned SarOpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri };
2448   static const unsigned ClrOpcode[]={ X86::MOV8ri, X86::MOV16ri, X86::MOV32ri };
2449   static const unsigned ExtRegs[]  ={ X86::AH    , X86::DX     , X86::EDX     };
2450
2451   static const unsigned DivOpcode[][4] = {
2452     { X86::DIV8r , X86::DIV16r , X86::DIV32r , 0 },  // Unsigned division
2453     { X86::IDIV8r, X86::IDIV16r, X86::IDIV32r, 0 },  // Signed division
2454   };
2455
2456   bool isSigned   = Ty->isSigned();
2457   unsigned Reg    = Regs[Class];
2458   unsigned ExtReg = ExtRegs[Class];
2459
2460   // Put the first operand into one of the A registers...
2461   unsigned Op0Reg = getReg(Op0, BB, IP);
2462   unsigned Op1Reg = getReg(Op1, BB, IP);
2463   BuildMI(*BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
2464
2465   if (isSigned) {
2466     // Emit a sign extension instruction...
2467     unsigned ShiftResult = makeAnotherReg(Op0->getType());
2468     BuildMI(*BB, IP, SarOpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31);
2469     BuildMI(*BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult);
2470   } else {
2471     // If unsigned, emit a zeroing instruction... (reg = 0)
2472     BuildMI(*BB, IP, ClrOpcode[Class], 2, ExtReg).addImm(0);
2473   }
2474
2475   // Emit the appropriate divide or remainder instruction...
2476   BuildMI(*BB, IP, DivOpcode[isSigned][Class], 1).addReg(Op1Reg);
2477
2478   // Figure out which register we want to pick the result out of...
2479   unsigned DestReg = isDiv ? Reg : ExtReg;
2480
2481   // Put the result into the destination register...
2482   BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(DestReg);
2483 }
2484
2485
2486 /// Shift instructions: 'shl', 'sar', 'shr' - Some special cases here
2487 /// for constant immediate shift values, and for constant immediate
2488 /// shift values equal to 1. Even the general case is sort of special,
2489 /// because the shift amount has to be in CL, not just any old register.
2490 ///
2491 void ISel::visitShiftInst(ShiftInst &I) {
2492   MachineBasicBlock::iterator IP = BB->end ();
2493   emitShiftOperation (BB, IP, I.getOperand (0), I.getOperand (1),
2494                       I.getOpcode () == Instruction::Shl, I.getType (),
2495                       getReg (I));
2496 }
2497
2498 /// emitShiftOperation - Common code shared between visitShiftInst and
2499 /// constant expression support.
2500 void ISel::emitShiftOperation(MachineBasicBlock *MBB,
2501                               MachineBasicBlock::iterator IP,
2502                               Value *Op, Value *ShiftAmount, bool isLeftShift,
2503                               const Type *ResultTy, unsigned DestReg) {
2504   unsigned SrcReg = getReg (Op, MBB, IP);
2505   bool isSigned = ResultTy->isSigned ();
2506   unsigned Class = getClass (ResultTy);
2507
2508   static const unsigned ConstantOperand[][4] = {
2509     { X86::SHR8ri, X86::SHR16ri, X86::SHR32ri, X86::SHRD32rri8 },  // SHR
2510     { X86::SAR8ri, X86::SAR16ri, X86::SAR32ri, X86::SHRD32rri8 },  // SAR
2511     { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri, X86::SHLD32rri8 },  // SHL
2512     { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri, X86::SHLD32rri8 },  // SAL = SHL
2513   };
2514
2515   static const unsigned NonConstantOperand[][4] = {
2516     { X86::SHR8rCL, X86::SHR16rCL, X86::SHR32rCL },  // SHR
2517     { X86::SAR8rCL, X86::SAR16rCL, X86::SAR32rCL },  // SAR
2518     { X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL },  // SHL
2519     { X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL },  // SAL = SHL
2520   };
2521
2522   // Longs, as usual, are handled specially...
2523   if (Class == cLong) {
2524     // If we have a constant shift, we can generate much more efficient code
2525     // than otherwise...
2526     //
2527     if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(ShiftAmount)) {
2528       unsigned Amount = CUI->getValue();
2529       if (Amount < 32) {
2530         const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
2531         if (isLeftShift) {
2532           BuildMI(*MBB, IP, Opc[3], 3,
2533               DestReg+1).addReg(SrcReg+1).addReg(SrcReg).addImm(Amount);
2534           BuildMI(*MBB, IP, Opc[2], 2, DestReg).addReg(SrcReg).addImm(Amount);
2535         } else {
2536           BuildMI(*MBB, IP, Opc[3], 3,
2537               DestReg).addReg(SrcReg  ).addReg(SrcReg+1).addImm(Amount);
2538           BuildMI(*MBB, IP, Opc[2],2,DestReg+1).addReg(SrcReg+1).addImm(Amount);
2539         }
2540       } else {                 // Shifting more than 32 bits
2541         Amount -= 32;
2542         if (isLeftShift) {
2543           if (Amount != 0) {
2544             BuildMI(*MBB, IP, X86::SHL32ri, 2,
2545                     DestReg + 1).addReg(SrcReg).addImm(Amount);
2546           } else {
2547             BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg);
2548           }
2549           BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
2550         } else {
2551           if (Amount != 0) {
2552             BuildMI(*MBB, IP, isSigned ? X86::SAR32ri : X86::SHR32ri, 2,
2553                     DestReg).addReg(SrcReg+1).addImm(Amount);
2554           } else {
2555             BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg+1);
2556           }
2557           BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
2558         }
2559       }
2560     } else {
2561       unsigned TmpReg = makeAnotherReg(Type::IntTy);
2562
2563       if (!isLeftShift && isSigned) {
2564         // If this is a SHR of a Long, then we need to do funny sign extension
2565         // stuff.  TmpReg gets the value to use as the high-part if we are
2566         // shifting more than 32 bits.
2567         BuildMI(*MBB, IP, X86::SAR32ri, 2, TmpReg).addReg(SrcReg).addImm(31);
2568       } else {
2569         // Other shifts use a fixed zero value if the shift is more than 32
2570         // bits.
2571         BuildMI(*MBB, IP, X86::MOV32ri, 1, TmpReg).addImm(0);
2572       }
2573
2574       // Initialize CL with the shift amount...
2575       unsigned ShiftAmountReg = getReg(ShiftAmount, MBB, IP);
2576       BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg);
2577
2578       unsigned TmpReg2 = makeAnotherReg(Type::IntTy);
2579       unsigned TmpReg3 = makeAnotherReg(Type::IntTy);
2580       if (isLeftShift) {
2581         // TmpReg2 = shld inHi, inLo
2582         BuildMI(*MBB, IP, X86::SHLD32rrCL,2,TmpReg2).addReg(SrcReg+1)
2583                                                     .addReg(SrcReg);
2584         // TmpReg3 = shl  inLo, CL
2585         BuildMI(*MBB, IP, X86::SHL32rCL, 1, TmpReg3).addReg(SrcReg);
2586
2587         // Set the flags to indicate whether the shift was by more than 32 bits.
2588         BuildMI(*MBB, IP, X86::TEST8ri, 2).addReg(X86::CL).addImm(32);
2589
2590         // DestHi = (>32) ? TmpReg3 : TmpReg2;
2591         BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
2592                 DestReg+1).addReg(TmpReg2).addReg(TmpReg3);
2593         // DestLo = (>32) ? TmpReg : TmpReg3;
2594         BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
2595             DestReg).addReg(TmpReg3).addReg(TmpReg);
2596       } else {
2597         // TmpReg2 = shrd inLo, inHi
2598         BuildMI(*MBB, IP, X86::SHRD32rrCL,2,TmpReg2).addReg(SrcReg)
2599                                                     .addReg(SrcReg+1);
2600         // TmpReg3 = s[ah]r  inHi, CL
2601         BuildMI(*MBB, IP, isSigned ? X86::SAR32rCL : X86::SHR32rCL, 1, TmpReg3)
2602                        .addReg(SrcReg+1);
2603
2604         // Set the flags to indicate whether the shift was by more than 32 bits.
2605         BuildMI(*MBB, IP, X86::TEST8ri, 2).addReg(X86::CL).addImm(32);
2606
2607         // DestLo = (>32) ? TmpReg3 : TmpReg2;
2608         BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
2609                 DestReg).addReg(TmpReg2).addReg(TmpReg3);
2610
2611         // DestHi = (>32) ? TmpReg : TmpReg3;
2612         BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
2613                 DestReg+1).addReg(TmpReg3).addReg(TmpReg);
2614       }
2615     }
2616     return;
2617   }
2618
2619   if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(ShiftAmount)) {
2620     // The shift amount is constant, guaranteed to be a ubyte. Get its value.
2621     assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?");
2622
2623     const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
2624     BuildMI(*MBB, IP, Opc[Class], 2,
2625         DestReg).addReg(SrcReg).addImm(CUI->getValue());
2626   } else {                  // The shift amount is non-constant.
2627     unsigned ShiftAmountReg = getReg (ShiftAmount, MBB, IP);
2628     BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg);
2629
2630     const unsigned *Opc = NonConstantOperand[isLeftShift*2+isSigned];
2631     BuildMI(*MBB, IP, Opc[Class], 1, DestReg).addReg(SrcReg);
2632   }
2633 }
2634
2635
2636 void ISel::getAddressingMode(Value *Addr, unsigned &BaseReg, unsigned &Scale,
2637                              unsigned &IndexReg, unsigned &Disp) {
2638   BaseReg = 0; Scale = 1; IndexReg = 0; Disp = 0;
2639   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) {
2640     if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(),
2641                        BaseReg, Scale, IndexReg, Disp))
2642       return;
2643   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
2644     if (CE->getOpcode() == Instruction::GetElementPtr)
2645       if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(),
2646                         BaseReg, Scale, IndexReg, Disp))
2647         return;
2648   }
2649
2650   // If it's not foldable, reset addr mode.
2651   BaseReg = getReg(Addr);
2652   Scale = 1; IndexReg = 0; Disp = 0;
2653 }
2654
2655
2656 /// visitLoadInst - Implement LLVM load instructions in terms of the x86 'mov'
2657 /// instruction.  The load and store instructions are the only place where we
2658 /// need to worry about the memory layout of the target machine.
2659 ///
2660 void ISel::visitLoadInst(LoadInst &I) {
2661   // Check to see if this load instruction is going to be folded into a binary
2662   // instruction, like add.  If so, we don't want to emit it.  Wouldn't a real
2663   // pattern matching instruction selector be nice?
2664   unsigned Class = getClassB(I.getType());
2665   if (I.hasOneUse()) {
2666     Instruction *User = cast<Instruction>(I.use_back());
2667     switch (User->getOpcode()) {
2668     case Instruction::Cast:
2669       // If this is a cast from a signed-integer type to a floating point type,
2670       // fold the cast here.
2671       if (getClass(User->getType()) == cFP &&
2672           (I.getType() == Type::ShortTy || I.getType() == Type::IntTy ||
2673            I.getType() == Type::LongTy)) {
2674         unsigned DestReg = getReg(User);
2675         static const unsigned Opcode[] = {
2676           0/*BYTE*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m
2677         };
2678         unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
2679         getAddressingMode(I.getOperand(0), BaseReg, Scale, IndexReg, Disp);
2680         addFullAddress(BuildMI(BB, Opcode[Class], 5, DestReg),
2681                        BaseReg, Scale, IndexReg, Disp);
2682         return;
2683       } else {
2684         User = 0;
2685       }
2686       break;
2687
2688     case Instruction::Add:
2689     case Instruction::Sub:
2690     case Instruction::And:
2691     case Instruction::Or:
2692     case Instruction::Xor:
2693       if (Class == cLong) User = 0;
2694       break;
2695     case Instruction::Mul:
2696     case Instruction::Div:
2697       if (Class != cFP) User = 0;
2698       break;  // Folding only implemented for floating point.
2699     default: User = 0; break;
2700     }
2701
2702     if (User) {
2703       // Okay, we found a user.  If the load is the first operand and there is
2704       // no second operand load, reverse the operand ordering.  Note that this
2705       // can fail for a subtract (ie, no change will be made).
2706       if (!isa<LoadInst>(User->getOperand(1)))
2707         cast<BinaryOperator>(User)->swapOperands();
2708
2709       // Okay, now that everything is set up, if this load is used by the second
2710       // operand, and if there are no instructions that invalidate the load
2711       // before the binary operator, eliminate the load.
2712       if (User->getOperand(1) == &I &&
2713           isSafeToFoldLoadIntoInstruction(I, *User))
2714         return;   // Eliminate the load!
2715
2716       // If this is a floating point sub or div, we won't be able to swap the
2717       // operands, but we will still be able to eliminate the load.
2718       if (Class == cFP && User->getOperand(0) == &I &&
2719           !isa<LoadInst>(User->getOperand(1)) &&
2720           (User->getOpcode() == Instruction::Sub ||
2721            User->getOpcode() == Instruction::Div) &&
2722           isSafeToFoldLoadIntoInstruction(I, *User))
2723         return;  // Eliminate the load!
2724     }
2725   }
2726
2727   unsigned DestReg = getReg(I);
2728   unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0;
2729   getAddressingMode(I.getOperand(0), BaseReg, Scale, IndexReg, Disp);
2730
2731   if (Class == cLong) {
2732     addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg),
2733                    BaseReg, Scale, IndexReg, Disp);
2734     addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg+1),
2735                    BaseReg, Scale, IndexReg, Disp+4);
2736     return;
2737   }
2738
2739   static const unsigned Opcodes[] = {
2740     X86::MOV8rm, X86::MOV16rm, X86::MOV32rm, X86::FLD32m
2741   };
2742   unsigned Opcode = Opcodes[Class];
2743   if (I.getType() == Type::DoubleTy) Opcode = X86::FLD64m;
2744   addFullAddress(BuildMI(BB, Opcode, 4, DestReg),
2745                  BaseReg, Scale, IndexReg, Disp);
2746 }
2747
2748 /// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov'
2749 /// instruction.
2750 ///
2751 void ISel::visitStoreInst(StoreInst &I) {
2752   unsigned BaseReg, Scale, IndexReg, Disp;
2753   getAddressingMode(I.getOperand(1), BaseReg, Scale, IndexReg, Disp);
2754
2755   const Type *ValTy = I.getOperand(0)->getType();
2756   unsigned Class = getClassB(ValTy);
2757
2758   if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(0))) {
2759     uint64_t Val = CI->getRawValue();
2760     if (Class == cLong) {
2761       addFullAddress(BuildMI(BB, X86::MOV32mi, 5),
2762                      BaseReg, Scale, IndexReg, Disp).addImm(Val & ~0U);
2763       addFullAddress(BuildMI(BB, X86::MOV32mi, 5),
2764                      BaseReg, Scale, IndexReg, Disp+4).addImm(Val>>32);
2765     } else {
2766       static const unsigned Opcodes[] = {
2767         X86::MOV8mi, X86::MOV16mi, X86::MOV32mi
2768       };
2769       unsigned Opcode = Opcodes[Class];
2770       addFullAddress(BuildMI(BB, Opcode, 5),
2771                      BaseReg, Scale, IndexReg, Disp).addImm(Val);
2772     }
2773   } else if (ConstantBool *CB = dyn_cast<ConstantBool>(I.getOperand(0))) {
2774     addFullAddress(BuildMI(BB, X86::MOV8mi, 5),
2775                    BaseReg, Scale, IndexReg, Disp).addImm(CB->getValue());
2776   } else {
2777     if (Class == cLong) {
2778       unsigned ValReg = getReg(I.getOperand(0));
2779       addFullAddress(BuildMI(BB, X86::MOV32mr, 5),
2780                      BaseReg, Scale, IndexReg, Disp).addReg(ValReg);
2781       addFullAddress(BuildMI(BB, X86::MOV32mr, 5),
2782                      BaseReg, Scale, IndexReg, Disp+4).addReg(ValReg+1);
2783     } else {
2784       unsigned ValReg = getReg(I.getOperand(0));
2785       static const unsigned Opcodes[] = {
2786         X86::MOV8mr, X86::MOV16mr, X86::MOV32mr, X86::FST32m
2787       };
2788       unsigned Opcode = Opcodes[Class];
2789       if (ValTy == Type::DoubleTy) Opcode = X86::FST64m;
2790       addFullAddress(BuildMI(BB, Opcode, 1+4),
2791                      BaseReg, Scale, IndexReg, Disp).addReg(ValReg);
2792     }
2793   }
2794 }
2795
2796
2797 /// visitCastInst - Here we have various kinds of copying with or without sign
2798 /// extension going on.
2799 ///
2800 void ISel::visitCastInst(CastInst &CI) {
2801   Value *Op = CI.getOperand(0);
2802
2803   unsigned SrcClass = getClassB(Op->getType());
2804   unsigned DestClass = getClassB(CI.getType());
2805   // Noop casts are not emitted: getReg will return the source operand as the
2806   // register to use for any uses of the noop cast.
2807   if (DestClass == SrcClass)
2808     return;
2809
2810   // If this is a cast from a 32-bit integer to a Long type, and the only uses
2811   // of the case are GEP instructions, then the cast does not need to be
2812   // generated explicitly, it will be folded into the GEP.
2813   if (DestClass == cLong && SrcClass == cInt) {
2814     bool AllUsesAreGEPs = true;
2815     for (Value::use_iterator I = CI.use_begin(), E = CI.use_end(); I != E; ++I)
2816       if (!isa<GetElementPtrInst>(*I)) {
2817         AllUsesAreGEPs = false;
2818         break;
2819       }
2820
2821     // No need to codegen this cast if all users are getelementptr instrs...
2822     if (AllUsesAreGEPs) return;
2823   }
2824
2825   // If this cast converts a load from a short,int, or long integer to a FP
2826   // value, we will have folded this cast away.
2827   if (DestClass == cFP && isa<LoadInst>(Op) && Op->hasOneUse() &&
2828       (Op->getType() == Type::ShortTy || Op->getType() == Type::IntTy ||
2829        Op->getType() == Type::LongTy))
2830     return;
2831
2832
2833   unsigned DestReg = getReg(CI);
2834   MachineBasicBlock::iterator MI = BB->end();
2835   emitCastOperation(BB, MI, Op, CI.getType(), DestReg);
2836 }
2837
2838 /// emitCastOperation - Common code shared between visitCastInst and constant
2839 /// expression cast support.
2840 ///
2841 void ISel::emitCastOperation(MachineBasicBlock *BB,
2842                              MachineBasicBlock::iterator IP,
2843                              Value *Src, const Type *DestTy,
2844                              unsigned DestReg) {
2845   const Type *SrcTy = Src->getType();
2846   unsigned SrcClass = getClassB(SrcTy);
2847   unsigned DestClass = getClassB(DestTy);
2848   unsigned SrcReg = getReg(Src, BB, IP);
2849
2850   // Implement casts to bool by using compare on the operand followed by set if
2851   // not zero on the result.
2852   if (DestTy == Type::BoolTy) {
2853     switch (SrcClass) {
2854     case cByte:
2855       BuildMI(*BB, IP, X86::TEST8rr, 2).addReg(SrcReg).addReg(SrcReg);
2856       break;
2857     case cShort:
2858       BuildMI(*BB, IP, X86::TEST16rr, 2).addReg(SrcReg).addReg(SrcReg);
2859       break;
2860     case cInt:
2861       BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg).addReg(SrcReg);
2862       break;
2863     case cLong: {
2864       unsigned TmpReg = makeAnotherReg(Type::IntTy);
2865       BuildMI(*BB, IP, X86::OR32rr, 2, TmpReg).addReg(SrcReg).addReg(SrcReg+1);
2866       break;
2867     }
2868     case cFP:
2869       BuildMI(*BB, IP, X86::FTST, 1).addReg(SrcReg);
2870       BuildMI(*BB, IP, X86::FNSTSW8r, 0);
2871       BuildMI(*BB, IP, X86::SAHF, 1);
2872       break;
2873     }
2874
2875     // If the zero flag is not set, then the value is true, set the byte to
2876     // true.
2877     BuildMI(*BB, IP, X86::SETNEr, 1, DestReg);
2878     return;
2879   }
2880
2881   static const unsigned RegRegMove[] = {
2882     X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::MOV32rr
2883   };
2884
2885   // Implement casts between values of the same type class (as determined by
2886   // getClass) by using a register-to-register move.
2887   if (SrcClass == DestClass) {
2888     if (SrcClass <= cInt || (SrcClass == cFP && SrcTy == DestTy)) {
2889       BuildMI(*BB, IP, RegRegMove[SrcClass], 1, DestReg).addReg(SrcReg);
2890     } else if (SrcClass == cFP) {
2891       if (SrcTy == Type::FloatTy) {  // double -> float
2892         assert(DestTy == Type::DoubleTy && "Unknown cFP member!");
2893         BuildMI(*BB, IP, X86::FpMOV, 1, DestReg).addReg(SrcReg);
2894       } else {                       // float -> double
2895         assert(SrcTy == Type::DoubleTy && DestTy == Type::FloatTy &&
2896                "Unknown cFP member!");
2897         // Truncate from double to float by storing to memory as short, then
2898         // reading it back.
2899         unsigned FltAlign = TM.getTargetData().getFloatAlignment();
2900         int FrameIdx = F->getFrameInfo()->CreateStackObject(4, FltAlign);
2901         addFrameReference(BuildMI(*BB, IP, X86::FST32m, 5), FrameIdx).addReg(SrcReg);
2902         addFrameReference(BuildMI(*BB, IP, X86::FLD32m, 5, DestReg), FrameIdx);
2903       }
2904     } else if (SrcClass == cLong) {
2905       BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg);
2906       BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg+1);
2907     } else {
2908       assert(0 && "Cannot handle this type of cast instruction!");
2909       abort();
2910     }
2911     return;
2912   }
2913
2914   // Handle cast of SMALLER int to LARGER int using a move with sign extension
2915   // or zero extension, depending on whether the source type was signed.
2916   if (SrcClass <= cInt && (DestClass <= cInt || DestClass == cLong) &&
2917       SrcClass < DestClass) {
2918     bool isLong = DestClass == cLong;
2919     if (isLong) DestClass = cInt;
2920
2921     static const unsigned Opc[][4] = {
2922       { X86::MOVSX16rr8, X86::MOVSX32rr8, X86::MOVSX32rr16, X86::MOV32rr }, // s
2923       { X86::MOVZX16rr8, X86::MOVZX32rr8, X86::MOVZX32rr16, X86::MOV32rr }  // u
2924     };
2925
2926     bool isUnsigned = SrcTy->isUnsigned();
2927     BuildMI(*BB, IP, Opc[isUnsigned][SrcClass + DestClass - 1], 1,
2928         DestReg).addReg(SrcReg);
2929
2930     if (isLong) {  // Handle upper 32 bits as appropriate...
2931       if (isUnsigned)     // Zero out top bits...
2932         BuildMI(*BB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
2933       else                // Sign extend bottom half...
2934         BuildMI(*BB, IP, X86::SAR32ri, 2, DestReg+1).addReg(DestReg).addImm(31);
2935     }
2936     return;
2937   }
2938
2939   // Special case long -> int ...
2940   if (SrcClass == cLong && DestClass == cInt) {
2941     BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg);
2942     return;
2943   }
2944
2945   // Handle cast of LARGER int to SMALLER int using a move to EAX followed by a
2946   // move out of AX or AL.
2947   if ((SrcClass <= cInt || SrcClass == cLong) && DestClass <= cInt
2948       && SrcClass > DestClass) {
2949     static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX, 0, X86::EAX };
2950     BuildMI(*BB, IP, RegRegMove[SrcClass], 1, AReg[SrcClass]).addReg(SrcReg);
2951     BuildMI(*BB, IP, RegRegMove[DestClass], 1, DestReg).addReg(AReg[DestClass]);
2952     return;
2953   }
2954
2955   // Handle casts from integer to floating point now...
2956   if (DestClass == cFP) {
2957     // Promote the integer to a type supported by FLD.  We do this because there
2958     // are no unsigned FLD instructions, so we must promote an unsigned value to
2959     // a larger signed value, then use FLD on the larger value.
2960     //
2961     const Type *PromoteType = 0;
2962     unsigned PromoteOpcode = 0;
2963     unsigned RealDestReg = DestReg;
2964     switch (SrcTy->getPrimitiveID()) {
2965     case Type::BoolTyID:
2966     case Type::SByteTyID:
2967       // We don't have the facilities for directly loading byte sized data from
2968       // memory (even signed).  Promote it to 16 bits.
2969       PromoteType = Type::ShortTy;
2970       PromoteOpcode = X86::MOVSX16rr8;
2971       break;
2972     case Type::UByteTyID:
2973       PromoteType = Type::ShortTy;
2974       PromoteOpcode = X86::MOVZX16rr8;
2975       break;
2976     case Type::UShortTyID:
2977       PromoteType = Type::IntTy;
2978       PromoteOpcode = X86::MOVZX32rr16;
2979       break;
2980     case Type::UIntTyID: {
2981       // Make a 64 bit temporary... and zero out the top of it...
2982       unsigned TmpReg = makeAnotherReg(Type::LongTy);
2983       BuildMI(*BB, IP, X86::MOV32rr, 1, TmpReg).addReg(SrcReg);
2984       BuildMI(*BB, IP, X86::MOV32ri, 1, TmpReg+1).addImm(0);
2985       SrcTy = Type::LongTy;
2986       SrcClass = cLong;
2987       SrcReg = TmpReg;
2988       break;
2989     }
2990     case Type::ULongTyID:
2991       // Don't fild into the read destination.
2992       DestReg = makeAnotherReg(Type::DoubleTy);
2993       break;
2994     default:  // No promotion needed...
2995       break;
2996     }
2997
2998     if (PromoteType) {
2999       unsigned TmpReg = makeAnotherReg(PromoteType);
3000       BuildMI(*BB, IP, PromoteOpcode, 1, TmpReg).addReg(SrcReg);
3001       SrcTy = PromoteType;
3002       SrcClass = getClass(PromoteType);
3003       SrcReg = TmpReg;
3004     }
3005
3006     // Spill the integer to memory and reload it from there...
3007     int FrameIdx =
3008       F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData());
3009
3010     if (SrcClass == cLong) {
3011       addFrameReference(BuildMI(*BB, IP, X86::MOV32mr, 5),
3012                         FrameIdx).addReg(SrcReg);
3013       addFrameReference(BuildMI(*BB, IP, X86::MOV32mr, 5),
3014                         FrameIdx, 4).addReg(SrcReg+1);
3015     } else {
3016       static const unsigned Op1[] = { X86::MOV8mr, X86::MOV16mr, X86::MOV32mr };
3017       addFrameReference(BuildMI(*BB, IP, Op1[SrcClass], 5),
3018                         FrameIdx).addReg(SrcReg);
3019     }
3020
3021     static const unsigned Op2[] =
3022       { 0/*byte*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m };
3023     addFrameReference(BuildMI(*BB, IP, Op2[SrcClass], 5, DestReg), FrameIdx);
3024
3025     // We need special handling for unsigned 64-bit integer sources.  If the
3026     // input number has the "sign bit" set, then we loaded it incorrectly as a
3027     // negative 64-bit number.  In this case, add an offset value.
3028     if (SrcTy == Type::ULongTy) {
3029       // Emit a test instruction to see if the dynamic input value was signed.
3030       BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg+1).addReg(SrcReg+1);
3031
3032       // If the sign bit is set, get a pointer to an offset, otherwise get a
3033       // pointer to a zero.
3034       MachineConstantPool *CP = F->getConstantPool();
3035       unsigned Zero = makeAnotherReg(Type::IntTy);
3036       Constant *Null = Constant::getNullValue(Type::UIntTy);
3037       addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Zero),
3038                                CP->getConstantPoolIndex(Null));
3039       unsigned Offset = makeAnotherReg(Type::IntTy);
3040       Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000);
3041
3042       addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Offset),
3043                                CP->getConstantPoolIndex(OffsetCst));
3044       unsigned Addr = makeAnotherReg(Type::IntTy);
3045       BuildMI(*BB, IP, X86::CMOVS32rr, 2, Addr).addReg(Zero).addReg(Offset);
3046
3047       // Load the constant for an add.  FIXME: this could make an 'fadd' that
3048       // reads directly from memory, but we don't support these yet.
3049       unsigned ConstReg = makeAnotherReg(Type::DoubleTy);
3050       addDirectMem(BuildMI(*BB, IP, X86::FLD32m, 4, ConstReg), Addr);
3051
3052       BuildMI(*BB, IP, X86::FpADD, 2, RealDestReg)
3053                 .addReg(ConstReg).addReg(DestReg);
3054     }
3055
3056     return;
3057   }
3058
3059   // Handle casts from floating point to integer now...
3060   if (SrcClass == cFP) {
3061     // Change the floating point control register to use "round towards zero"
3062     // mode when truncating to an integer value.
3063     //
3064     int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
3065     addFrameReference(BuildMI(*BB, IP, X86::FNSTCW16m, 4), CWFrameIdx);
3066
3067     // Load the old value of the high byte of the control word...
3068     unsigned HighPartOfCW = makeAnotherReg(Type::UByteTy);
3069     addFrameReference(BuildMI(*BB, IP, X86::MOV8rm, 4, HighPartOfCW),
3070                       CWFrameIdx, 1);
3071
3072     // Set the high part to be round to zero...
3073     addFrameReference(BuildMI(*BB, IP, X86::MOV8mi, 5),
3074                       CWFrameIdx, 1).addImm(12);
3075
3076     // Reload the modified control word now...
3077     addFrameReference(BuildMI(*BB, IP, X86::FLDCW16m, 4), CWFrameIdx);
3078
3079     // Restore the memory image of control word to original value
3080     addFrameReference(BuildMI(*BB, IP, X86::MOV8mr, 5),
3081                       CWFrameIdx, 1).addReg(HighPartOfCW);
3082
3083     // We don't have the facilities for directly storing byte sized data to
3084     // memory.  Promote it to 16 bits.  We also must promote unsigned values to
3085     // larger classes because we only have signed FP stores.
3086     unsigned StoreClass  = DestClass;
3087     const Type *StoreTy  = DestTy;
3088     if (StoreClass == cByte || DestTy->isUnsigned())
3089       switch (StoreClass) {
3090       case cByte:  StoreTy = Type::ShortTy; StoreClass = cShort; break;
3091       case cShort: StoreTy = Type::IntTy;   StoreClass = cInt;   break;
3092       case cInt:   StoreTy = Type::LongTy;  StoreClass = cLong;  break;
3093       // The following treatment of cLong may not be perfectly right,
3094       // but it survives chains of casts of the form
3095       // double->ulong->double.
3096       case cLong:  StoreTy = Type::LongTy;  StoreClass = cLong;  break;
3097       default: assert(0 && "Unknown store class!");
3098       }
3099
3100     // Spill the integer to memory and reload it from there...
3101     int FrameIdx =
3102       F->getFrameInfo()->CreateStackObject(StoreTy, TM.getTargetData());
3103
3104     static const unsigned Op1[] =
3105       { 0, X86::FIST16m, X86::FIST32m, 0, X86::FISTP64m };
3106     addFrameReference(BuildMI(*BB, IP, Op1[StoreClass], 5),
3107                       FrameIdx).addReg(SrcReg);
3108
3109     if (DestClass == cLong) {
3110       addFrameReference(BuildMI(*BB, IP, X86::MOV32rm, 4, DestReg), FrameIdx);
3111       addFrameReference(BuildMI(*BB, IP, X86::MOV32rm, 4, DestReg+1),
3112                         FrameIdx, 4);
3113     } else {
3114       static const unsigned Op2[] = { X86::MOV8rm, X86::MOV16rm, X86::MOV32rm };
3115       addFrameReference(BuildMI(*BB, IP, Op2[DestClass], 4, DestReg), FrameIdx);
3116     }
3117
3118     // Reload the original control word now...
3119     addFrameReference(BuildMI(*BB, IP, X86::FLDCW16m, 4), CWFrameIdx);
3120     return;
3121   }
3122
3123   // Anything we haven't handled already, we can't (yet) handle at all.
3124   assert(0 && "Unhandled cast instruction!");
3125   abort();
3126 }
3127
3128 /// visitVANextInst - Implement the va_next instruction...
3129 ///
3130 void ISel::visitVANextInst(VANextInst &I) {
3131   unsigned VAList = getReg(I.getOperand(0));
3132   unsigned DestReg = getReg(I);
3133
3134   unsigned Size;
3135   switch (I.getArgType()->getPrimitiveID()) {
3136   default:
3137     std::cerr << I;
3138     assert(0 && "Error: bad type for va_next instruction!");
3139     return;
3140   case Type::PointerTyID:
3141   case Type::UIntTyID:
3142   case Type::IntTyID:
3143     Size = 4;
3144     break;
3145   case Type::ULongTyID:
3146   case Type::LongTyID:
3147   case Type::DoubleTyID:
3148     Size = 8;
3149     break;
3150   }
3151
3152   // Increment the VAList pointer...
3153   BuildMI(BB, X86::ADD32ri, 2, DestReg).addReg(VAList).addImm(Size);
3154 }
3155
3156 void ISel::visitVAArgInst(VAArgInst &I) {
3157   unsigned VAList = getReg(I.getOperand(0));
3158   unsigned DestReg = getReg(I);
3159
3160   switch (I.getType()->getPrimitiveID()) {
3161   default:
3162     std::cerr << I;
3163     assert(0 && "Error: bad type for va_next instruction!");
3164     return;
3165   case Type::PointerTyID:
3166   case Type::UIntTyID:
3167   case Type::IntTyID:
3168     addDirectMem(BuildMI(BB, X86::MOV32rm, 4, DestReg), VAList);
3169     break;
3170   case Type::ULongTyID:
3171   case Type::LongTyID:
3172     addDirectMem(BuildMI(BB, X86::MOV32rm, 4, DestReg), VAList);
3173     addRegOffset(BuildMI(BB, X86::MOV32rm, 4, DestReg+1), VAList, 4);
3174     break;
3175   case Type::DoubleTyID:
3176     addDirectMem(BuildMI(BB, X86::FLD64m, 4, DestReg), VAList);
3177     break;
3178   }
3179 }
3180
3181 /// visitGetElementPtrInst - instruction-select GEP instructions
3182 ///
3183 void ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
3184   // If this GEP instruction will be folded into all of its users, we don't need
3185   // to explicitly calculate it!
3186   unsigned A, B, C, D;
3187   if (isGEPFoldable(0, I.getOperand(0), I.op_begin()+1, I.op_end(), A,B,C,D)) {
3188     // Check all of the users of the instruction to see if they are loads and
3189     // stores.
3190     bool AllWillFold = true;
3191     for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI)
3192       if (cast<Instruction>(*UI)->getOpcode() != Instruction::Load)
3193         if (cast<Instruction>(*UI)->getOpcode() != Instruction::Store ||
3194             cast<Instruction>(*UI)->getOperand(0) == &I) {
3195           AllWillFold = false;
3196           break;
3197         }
3198
3199     // If the instruction is foldable, and will be folded into all users, don't
3200     // emit it!
3201     if (AllWillFold) return;
3202   }
3203
3204   unsigned outputReg = getReg(I);
3205   emitGEPOperation(BB, BB->end(), I.getOperand(0),
3206                    I.op_begin()+1, I.op_end(), outputReg);
3207 }
3208
3209 /// getGEPIndex - Inspect the getelementptr operands specified with GEPOps and
3210 /// GEPTypes (the derived types being stepped through at each level).  On return
3211 /// from this function, if some indexes of the instruction are representable as
3212 /// an X86 lea instruction, the machine operands are put into the Ops
3213 /// instruction and the consumed indexes are poped from the GEPOps/GEPTypes
3214 /// lists.  Otherwise, GEPOps.size() is returned.  If this returns a an
3215 /// addressing mode that only partially consumes the input, the BaseReg input of
3216 /// the addressing mode must be left free.
3217 ///
3218 /// Note that there is one fewer entry in GEPTypes than there is in GEPOps.
3219 ///
3220 void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
3221                        std::vector<Value*> &GEPOps,
3222                        std::vector<const Type*> &GEPTypes, unsigned &BaseReg,
3223                        unsigned &Scale, unsigned &IndexReg, unsigned &Disp) {
3224   const TargetData &TD = TM.getTargetData();
3225
3226   // Clear out the state we are working with...
3227   BaseReg = 0;    // No base register
3228   Scale = 1;      // Unit scale
3229   IndexReg = 0;   // No index register
3230   Disp = 0;       // No displacement
3231
3232   // While there are GEP indexes that can be folded into the current address,
3233   // keep processing them.
3234   while (!GEPTypes.empty()) {
3235     if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) {
3236       // It's a struct access.  CUI is the index into the structure,
3237       // which names the field. This index must have unsigned type.
3238       const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back());
3239
3240       // Use the TargetData structure to pick out what the layout of the
3241       // structure is in memory.  Since the structure index must be constant, we
3242       // can get its value and use it to find the right byte offset from the
3243       // StructLayout class's list of structure member offsets.
3244       Disp += TD.getStructLayout(StTy)->MemberOffsets[CUI->getValue()];
3245       GEPOps.pop_back();        // Consume a GEP operand
3246       GEPTypes.pop_back();
3247     } else {
3248       // It's an array or pointer access: [ArraySize x ElementType].
3249       const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());
3250       Value *idx = GEPOps.back();
3251
3252       // idx is the index into the array.  Unlike with structure
3253       // indices, we may not know its actual value at code-generation
3254       // time.
3255
3256       // If idx is a constant, fold it into the offset.
3257       unsigned TypeSize = TD.getTypeSize(SqTy->getElementType());
3258       if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
3259         Disp += TypeSize*CSI->getValue();
3260       } else if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(idx)) {
3261         Disp += TypeSize*CUI->getValue();
3262       } else {
3263         // If the index reg is already taken, we can't handle this index.
3264         if (IndexReg) return;
3265
3266         // If this is a size that we can handle, then add the index as
3267         switch (TypeSize) {
3268         case 1: case 2: case 4: case 8:
3269           // These are all acceptable scales on X86.
3270           Scale = TypeSize;
3271           break;
3272         default:
3273           // Otherwise, we can't handle this scale
3274           return;
3275         }
3276
3277         if (CastInst *CI = dyn_cast<CastInst>(idx))
3278           if (CI->getOperand(0)->getType() == Type::IntTy ||
3279               CI->getOperand(0)->getType() == Type::UIntTy)
3280             idx = CI->getOperand(0);
3281
3282         IndexReg = MBB ? getReg(idx, MBB, IP) : 1;
3283       }
3284
3285       GEPOps.pop_back();        // Consume a GEP operand
3286       GEPTypes.pop_back();
3287     }
3288   }
3289
3290   // GEPTypes is empty, which means we have a single operand left.  See if we
3291   // can set it as the base register.
3292   //
3293   // FIXME: When addressing modes are more powerful/correct, we could load
3294   // global addresses directly as 32-bit immediates.
3295   assert(BaseReg == 0);
3296   BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 1;
3297   GEPOps.pop_back();        // Consume the last GEP operand
3298 }
3299
3300
3301 /// isGEPFoldable - Return true if the specified GEP can be completely
3302 /// folded into the addressing mode of a load/store or lea instruction.
3303 bool ISel::isGEPFoldable(MachineBasicBlock *MBB,
3304                          Value *Src, User::op_iterator IdxBegin,
3305                          User::op_iterator IdxEnd, unsigned &BaseReg,
3306                          unsigned &Scale, unsigned &IndexReg, unsigned &Disp) {
3307   if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Src))
3308     Src = CPR->getValue();
3309
3310   std::vector<Value*> GEPOps;
3311   GEPOps.resize(IdxEnd-IdxBegin+1);
3312   GEPOps[0] = Src;
3313   std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1);
3314
3315   std::vector<const Type*> GEPTypes;
3316   GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
3317                   gep_type_end(Src->getType(), IdxBegin, IdxEnd));
3318
3319   MachineBasicBlock::iterator IP;
3320   if (MBB) IP = MBB->end();
3321   getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp);
3322
3323   // We can fold it away iff the getGEPIndex call eliminated all operands.
3324   return GEPOps.empty();
3325 }
3326
3327 void ISel::emitGEPOperation(MachineBasicBlock *MBB,
3328                             MachineBasicBlock::iterator IP,
3329                             Value *Src, User::op_iterator IdxBegin,
3330                             User::op_iterator IdxEnd, unsigned TargetReg) {
3331   const TargetData &TD = TM.getTargetData();
3332   if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(Src))
3333     Src = CPR->getValue();
3334
3335   std::vector<Value*> GEPOps;
3336   GEPOps.resize(IdxEnd-IdxBegin+1);
3337   GEPOps[0] = Src;
3338   std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1);
3339
3340   std::vector<const Type*> GEPTypes;
3341   GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
3342                   gep_type_end(Src->getType(), IdxBegin, IdxEnd));
3343
3344   // Keep emitting instructions until we consume the entire GEP instruction.
3345   while (!GEPOps.empty()) {
3346     unsigned OldSize = GEPOps.size();
3347     unsigned BaseReg, Scale, IndexReg, Disp;
3348     getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp);
3349
3350     if (GEPOps.size() != OldSize) {
3351       // getGEPIndex consumed some of the input.  Build an LEA instruction here.
3352       unsigned NextTarget = 0;
3353       if (!GEPOps.empty()) {
3354         assert(BaseReg == 0 &&
3355            "getGEPIndex should have left the base register open for chaining!");
3356         NextTarget = BaseReg = makeAnotherReg(Type::UIntTy);
3357       }
3358
3359       if (IndexReg == 0 && Disp == 0)
3360         BuildMI(*MBB, IP, X86::MOV32rr, 1, TargetReg).addReg(BaseReg);
3361       else
3362         addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, TargetReg),
3363                        BaseReg, Scale, IndexReg, Disp);
3364       --IP;
3365       TargetReg = NextTarget;
3366     } else if (GEPTypes.empty()) {
3367       // The getGEPIndex operation didn't want to build an LEA.  Check to see if
3368       // all operands are consumed but the base pointer.  If so, just load it
3369       // into the register.
3370       if (GlobalValue *GV = dyn_cast<GlobalValue>(GEPOps[0])) {
3371         BuildMI(*MBB, IP, X86::MOV32ri, 1, TargetReg).addGlobalAddress(GV);
3372       } else {
3373         unsigned BaseReg = getReg(GEPOps[0], MBB, IP);
3374         BuildMI(*MBB, IP, X86::MOV32rr, 1, TargetReg).addReg(BaseReg);
3375       }
3376       break;                // we are now done
3377
3378     } else {
3379       // It's an array or pointer access: [ArraySize x ElementType].
3380       const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());
3381       Value *idx = GEPOps.back();
3382       GEPOps.pop_back();        // Consume a GEP operand
3383       GEPTypes.pop_back();
3384
3385       // Many GEP instructions use a [cast (int/uint) to LongTy] as their
3386       // operand on X86.  Handle this case directly now...
3387       if (CastInst *CI = dyn_cast<CastInst>(idx))
3388         if (CI->getOperand(0)->getType() == Type::IntTy ||
3389             CI->getOperand(0)->getType() == Type::UIntTy)
3390           idx = CI->getOperand(0);
3391
3392       // We want to add BaseReg to(idxReg * sizeof ElementType). First, we
3393       // must find the size of the pointed-to type (Not coincidentally, the next
3394       // type is the type of the elements in the array).
3395       const Type *ElTy = SqTy->getElementType();
3396       unsigned elementSize = TD.getTypeSize(ElTy);
3397
3398       // If idxReg is a constant, we don't need to perform the multiply!
3399       if (ConstantInt *CSI = dyn_cast<ConstantInt>(idx)) {
3400         if (!CSI->isNullValue()) {
3401           unsigned Offset = elementSize*CSI->getRawValue();
3402           unsigned Reg = makeAnotherReg(Type::UIntTy);
3403           BuildMI(*MBB, IP, X86::ADD32ri, 2, TargetReg)
3404                                 .addReg(Reg).addImm(Offset);
3405           --IP;            // Insert the next instruction before this one.
3406           TargetReg = Reg; // Codegen the rest of the GEP into this
3407         }
3408       } else if (elementSize == 1) {
3409         // If the element size is 1, we don't have to multiply, just add
3410         unsigned idxReg = getReg(idx, MBB, IP);
3411         unsigned Reg = makeAnotherReg(Type::UIntTy);
3412         BuildMI(*MBB, IP, X86::ADD32rr, 2,TargetReg).addReg(Reg).addReg(idxReg);
3413         --IP;            // Insert the next instruction before this one.
3414         TargetReg = Reg; // Codegen the rest of the GEP into this
3415       } else {
3416         unsigned idxReg = getReg(idx, MBB, IP);
3417         unsigned OffsetReg = makeAnotherReg(Type::UIntTy);
3418
3419         // Make sure we can back the iterator up to point to the first
3420         // instruction emitted.
3421         MachineBasicBlock::iterator BeforeIt = IP;
3422         if (IP == MBB->begin())
3423           BeforeIt = MBB->end();
3424         else
3425           --BeforeIt;
3426         doMultiplyConst(MBB, IP, OffsetReg, Type::IntTy, idxReg, elementSize);
3427
3428         // Emit an ADD to add OffsetReg to the basePtr.
3429         unsigned Reg = makeAnotherReg(Type::UIntTy);
3430         BuildMI(*MBB, IP, X86::ADD32rr, 2, TargetReg)
3431                           .addReg(Reg).addReg(OffsetReg);
3432
3433         // Step to the first instruction of the multiply.
3434         if (BeforeIt == MBB->end())
3435           IP = MBB->begin();
3436         else
3437           IP = ++BeforeIt;
3438
3439         TargetReg = Reg; // Codegen the rest of the GEP into this
3440       }
3441     }
3442   }
3443 }
3444
3445
3446 /// visitAllocaInst - If this is a fixed size alloca, allocate space from the
3447 /// frame manager, otherwise do it the hard way.
3448 ///
3449 void ISel::visitAllocaInst(AllocaInst &I) {
3450   // Find the data size of the alloca inst's getAllocatedType.
3451   const Type *Ty = I.getAllocatedType();
3452   unsigned TySize = TM.getTargetData().getTypeSize(Ty);
3453
3454   // If this is a fixed size alloca in the entry block for the function,
3455   // statically stack allocate the space.
3456   //
3457   if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(I.getArraySize())) {
3458     if (I.getParent() == I.getParent()->getParent()->begin()) {
3459       TySize *= CUI->getValue();   // Get total allocated size...
3460       unsigned Alignment = TM.getTargetData().getTypeAlignment(Ty);
3461
3462       // Create a new stack object using the frame manager...
3463       int FrameIdx = F->getFrameInfo()->CreateStackObject(TySize, Alignment);
3464       addFrameReference(BuildMI(BB, X86::LEA32r, 5, getReg(I)), FrameIdx);
3465       return;
3466     }
3467   }
3468
3469   // Create a register to hold the temporary result of multiplying the type size
3470   // constant by the variable amount.
3471   unsigned TotalSizeReg = makeAnotherReg(Type::UIntTy);
3472   unsigned SrcReg1 = getReg(I.getArraySize());
3473
3474   // TotalSizeReg = mul <numelements>, <TypeSize>
3475   MachineBasicBlock::iterator MBBI = BB->end();
3476   doMultiplyConst(BB, MBBI, TotalSizeReg, Type::UIntTy, SrcReg1, TySize);
3477
3478   // AddedSize = add <TotalSizeReg>, 15
3479   unsigned AddedSizeReg = makeAnotherReg(Type::UIntTy);
3480   BuildMI(BB, X86::ADD32ri, 2, AddedSizeReg).addReg(TotalSizeReg).addImm(15);
3481
3482   // AlignedSize = and <AddedSize>, ~15
3483   unsigned AlignedSize = makeAnotherReg(Type::UIntTy);
3484   BuildMI(BB, X86::AND32ri, 2, AlignedSize).addReg(AddedSizeReg).addImm(~15);
3485
3486   // Subtract size from stack pointer, thereby allocating some space.
3487   BuildMI(BB, X86::SUB32rr, 2, X86::ESP).addReg(X86::ESP).addReg(AlignedSize);
3488
3489   // Put a pointer to the space into the result register, by copying
3490   // the stack pointer.
3491   BuildMI(BB, X86::MOV32rr, 1, getReg(I)).addReg(X86::ESP);
3492
3493   // Inform the Frame Information that we have just allocated a variable-sized
3494   // object.
3495   F->getFrameInfo()->CreateVariableSizedObject();
3496 }
3497
3498 /// visitMallocInst - Malloc instructions are code generated into direct calls
3499 /// to the library malloc.
3500 ///
3501 void ISel::visitMallocInst(MallocInst &I) {
3502   unsigned AllocSize = TM.getTargetData().getTypeSize(I.getAllocatedType());
3503   unsigned Arg;
3504
3505   if (ConstantUInt *C = dyn_cast<ConstantUInt>(I.getOperand(0))) {
3506     Arg = getReg(ConstantUInt::get(Type::UIntTy, C->getValue() * AllocSize));
3507   } else {
3508     Arg = makeAnotherReg(Type::UIntTy);
3509     unsigned Op0Reg = getReg(I.getOperand(0));
3510     MachineBasicBlock::iterator MBBI = BB->end();
3511     doMultiplyConst(BB, MBBI, Arg, Type::UIntTy, Op0Reg, AllocSize);
3512   }
3513
3514   std::vector<ValueRecord> Args;
3515   Args.push_back(ValueRecord(Arg, Type::UIntTy));
3516   MachineInstr *TheCall = BuildMI(X86::CALLpcrel32,
3517                                   1).addExternalSymbol("malloc", true);
3518   doCall(ValueRecord(getReg(I), I.getType()), TheCall, Args);
3519 }
3520
3521
3522 /// visitFreeInst - Free instructions are code gen'd to call the free libc
3523 /// function.
3524 ///
3525 void ISel::visitFreeInst(FreeInst &I) {
3526   std::vector<ValueRecord> Args;
3527   Args.push_back(ValueRecord(I.getOperand(0)));
3528   MachineInstr *TheCall = BuildMI(X86::CALLpcrel32,
3529                                   1).addExternalSymbol("free", true);
3530   doCall(ValueRecord(0, Type::VoidTy), TheCall, Args);
3531 }
3532
3533 /// createX86SimpleInstructionSelector - This pass converts an LLVM function
3534 /// into a machine code representation is a very simple peep-hole fashion.  The
3535 /// generated code sucks but the implementation is nice and simple.
3536 ///
3537 FunctionPass *llvm::createX86SimpleInstructionSelector(TargetMachine &TM) {
3538   return new ISel(TM);
3539 }