lib/Target/X86/X86ISelSimple.cpp

   1 //===-- X86ISelSimple.cpp - A simple instruction selector for x86 ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by the LLVM research group and is distributed under
   6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines a simple peephole instruction selector for the x86 target
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "X86.h"
  15 #include "X86InstrBuilder.h"
  16 #include "X86InstrInfo.h"
  17 #include "llvm/Constants.h"
  18 #include "llvm/DerivedTypes.h"
  19 #include "llvm/Function.h"
  20 #include "llvm/Instructions.h"
  21 #include "llvm/Pass.h"
  22 #include "llvm/CodeGen/IntrinsicLowering.h"
  23 #include "llvm/CodeGen/MachineConstantPool.h"
  24 #include "llvm/CodeGen/MachineFrameInfo.h"
  25 #include "llvm/CodeGen/MachineFunction.h"
  26 #include "llvm/CodeGen/SSARegMap.h"
  27 #include "llvm/Target/MRegisterInfo.h"
  28 #include "llvm/Target/TargetMachine.h"
  29 #include "llvm/Support/GetElementPtrTypeIterator.h"
  30 #include "llvm/Support/InstVisitor.h"
  31 #include "llvm/ADT/Statistic.h"
  32 using namespace llvm;
  33
  34 namespace {
  35   Statistic<>
  36   NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
  37
  38   /// TypeClass - Used by the X86 backend to group LLVM types by their basic X86
  39   /// Representation.
  40   ///
  41   enum TypeClass {
  42     cByte, cShort, cInt, cFP, cLong
  43   };
  44 }
  45
  46 /// getClass - Turn a primitive type into a "class" number which is based on the
  47 /// size of the type, and whether or not it is floating point.
  48 ///
  49 static inline TypeClass getClass(const Type *Ty) {
  50   switch (Ty->getTypeID()) {
  51   case Type::SByteTyID:
  52   case Type::UByteTyID:   return cByte;      // Byte operands are class #0
  53   case Type::ShortTyID:
  54   case Type::UShortTyID:  return cShort;     // Short operands are class #1
  55   case Type::IntTyID:
  56   case Type::UIntTyID:
  57   case Type::PointerTyID: return cInt;       // Int's and pointers are class #2
  58
  59   case Type::FloatTyID:
  60   case Type::DoubleTyID:  return cFP;        // Floating Point is #3
  61
  62   case Type::LongTyID:
  63   case Type::ULongTyID:   return cLong;      // Longs are class #4
  64   default:
  65     assert(0 && "Invalid type to getClass!");
  66     return cByte;  // not reached
  67   }
  68 }
  69
  70 // getClassB - Just like getClass, but treat boolean values as bytes.
  71 static inline TypeClass getClassB(const Type *Ty) {
  72   if (Ty == Type::BoolTy) return cByte;
  73   return getClass(Ty);
  74 }
  75
  76 namespace {
  77   struct X86ISel : public FunctionPass, InstVisitor<X86ISel> {
  78     TargetMachine &TM;
  79     MachineFunction *F;                 // The function we are compiling into
  80     MachineBasicBlock *BB;              // The current MBB we are compiling
  81     int VarArgsFrameIndex;              // FrameIndex for start of varargs area
  82     int ReturnAddressIndex;             // FrameIndex for the return address
  83
  84     std::map<Value*, unsigned> RegMap;  // Mapping between Val's and SSA Regs
  85
  86     // MBBMap - Mapping between LLVM BB -> Machine BB
  87     std::map<const BasicBlock*, MachineBasicBlock*> MBBMap;
  88
  89     // AllocaMap - Mapping from fixed sized alloca instructions to the
  90     // FrameIndex for the alloca.
  91     std::map<AllocaInst*, unsigned> AllocaMap;
  92
  93     X86ISel(TargetMachine &tm) : TM(tm), F(0), BB(0) {}
  94
  95     /// runOnFunction - Top level implementation of instruction selection for
  96     /// the entire function.
  97     ///
  98     bool runOnFunction(Function &Fn) {
  99       // Lazily create a stack slot for the return address if needed.
 100       ReturnAddressIndex = 0;
 101
 102       // First pass over the function, lower any unknown intrinsic functions
 103       // with the IntrinsicLowering class.
 104       LowerUnknownIntrinsicFunctionCalls(Fn);
 105
 106       F = &MachineFunction::construct(&Fn, TM);
 107
 108       // Create all of the machine basic blocks for the function...
 109       for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
 110         F->getBasicBlockList().push_back(MBBMap[I] = new MachineBasicBlock(I));
 111
 112       BB = &F->front();
 113
 114       // Copy incoming arguments off of the stack...
 115       LoadArgumentsToVirtualRegs(Fn);
 116
 117       // If this is main, emit special code.
 118       if (Fn.hasExternalLinkage() && Fn.getName() == "main")
 119         EmitSpecialCodeForMain();
 120
 121       // Instruction select everything except PHI nodes
 122       visit(Fn);
 123
 124       // Select the PHI nodes
 125       SelectPHINodes();
 126
 127       // Insert the FP_REG_KILL instructions into blocks that need them.
 128       InsertFPRegKills();
 129
 130       RegMap.clear();
 131       MBBMap.clear();
 132       AllocaMap.clear();
 133       F = 0;
 134       // We always build a machine code representation for the function
 135       return true;
 136     }
 137
 138     virtual const char *getPassName() const {
 139       return "X86 Simple Instruction Selection";
 140     }
 141
 142     /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
 143     /// the main function.
 144     void EmitSpecialCodeForMain();
 145
 146     /// visitBasicBlock - This method is called when we are visiting a new basic
 147     /// block.  This simply creates a new MachineBasicBlock to emit code into
 148     /// and adds it to the current MachineFunction.  Subsequent visit* for
 149     /// instructions will be invoked for all instructions in the basic block.
 150     ///
 151     void visitBasicBlock(BasicBlock &LLVM_BB) {
 152       BB = MBBMap[&LLVM_BB];
 153     }
 154
 155     /// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the
 156     /// function, lowering any calls to unknown intrinsic functions into the
 157     /// equivalent LLVM code.
 158     ///
 159     void LowerUnknownIntrinsicFunctionCalls(Function &F);
 160
 161     /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function
 162     /// from the stack into virtual registers.
 163     ///
 164     void LoadArgumentsToVirtualRegs(Function &F);
 165
 166     /// SelectPHINodes - Insert machine code to generate phis.  This is tricky
 167     /// because we have to generate our sources into the source basic blocks,
 168     /// not the current one.
 169     ///
 170     void SelectPHINodes();
 171
 172     /// InsertFPRegKills - Insert FP_REG_KILL instructions into basic blocks
 173     /// that need them.  This only occurs due to the floating point stackifier
 174     /// not being aggressive enough to handle arbitrary global stackification.
 175     ///
 176     void InsertFPRegKills();
 177
 178     // Visitation methods for various instructions.  These methods simply emit
 179     // fixed X86 code for each instruction.
 180     //
 181
 182     // Control flow operators
 183     void visitReturnInst(ReturnInst &RI);
 184     void visitBranchInst(BranchInst &BI);
 185     void visitUnreachableInst(UnreachableInst &UI) {}
 186
 187     struct ValueRecord {
 188       Value *Val;
 189       unsigned Reg;
 190       const Type *Ty;
 191       ValueRecord(unsigned R, const Type *T) : Val(0), Reg(R), Ty(T) {}
 192       ValueRecord(Value *V) : Val(V), Reg(0), Ty(V->getType()) {}
 193     };
 194     void doCall(const ValueRecord &Ret, MachineInstr *CallMI,
 195                 const std::vector<ValueRecord> &Args);
 196     void visitCallInst(CallInst &I);
 197     void visitIntrinsicCall(Intrinsic::ID ID, CallInst &I);
 198
 199     // Arithmetic operators
 200     void visitSimpleBinary(BinaryOperator &B, unsigned OpcodeClass);
 201     void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); }
 202     void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); }
 203     void visitMul(BinaryOperator &B);
 204
 205     void visitDiv(BinaryOperator &B) { visitDivRem(B); }
 206     void visitRem(BinaryOperator &B) { visitDivRem(B); }
 207     void visitDivRem(BinaryOperator &B);
 208
 209     // Bitwise operators
 210     void visitAnd(BinaryOperator &B) { visitSimpleBinary(B, 2); }
 211     void visitOr (BinaryOperator &B) { visitSimpleBinary(B, 3); }
 212     void visitXor(BinaryOperator &B) { visitSimpleBinary(B, 4); }
 213
 214     // Comparison operators...
 215     void visitSetCondInst(SetCondInst &I);
 216     unsigned EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
 217                             MachineBasicBlock *MBB,
 218                             MachineBasicBlock::iterator MBBI);
 219     void visitSelectInst(SelectInst &SI);
 220
 221
 222     // Memory Instructions
 223     void visitLoadInst(LoadInst &I);
 224     void visitStoreInst(StoreInst &I);
 225     void visitGetElementPtrInst(GetElementPtrInst &I);
 226     void visitAllocaInst(AllocaInst &I);
 227     void visitMallocInst(MallocInst &I);
 228     void visitFreeInst(FreeInst &I);
 229
 230     // Other operators
 231     void visitShiftInst(ShiftInst &I);
 232     void visitPHINode(PHINode &I) {}      // PHI nodes handled by second pass
 233     void visitCastInst(CastInst &I);
 234     void visitVANextInst(VANextInst &I);
 235     void visitVAArgInst(VAArgInst &I);
 236
 237     void visitInstruction(Instruction &I) {
 238       std::cerr << "Cannot instruction select: " << I;
 239       abort();
 240     }
 241
 242     /// promote32 - Make a value 32-bits wide, and put it somewhere.
 243     ///
 244     void promote32(unsigned targetReg, const ValueRecord &VR);
 245
 246     /// getAddressingMode - Get the addressing mode to use to address the
 247     /// specified value.  The returned value should be used with addFullAddress.
 248     void getAddressingMode(Value *Addr, X86AddressMode &AM);
 249
 250
 251     /// getGEPIndex - This is used to fold GEP instructions into X86 addressing
 252     /// expressions.
 253     void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
 254                      std::vector<Value*> &GEPOps,
 255                      std::vector<const Type*> &GEPTypes,
 256                      X86AddressMode &AM);
 257
 258     /// isGEPFoldable - Return true if the specified GEP can be completely
 259     /// folded into the addressing mode of a load/store or lea instruction.
 260     bool isGEPFoldable(MachineBasicBlock *MBB,
 261                        Value *Src, User::op_iterator IdxBegin,
 262                        User::op_iterator IdxEnd, X86AddressMode &AM);
 263
 264     /// emitGEPOperation - Common code shared between visitGetElementPtrInst and
 265     /// constant expression GEP support.
 266     ///
 267     void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
 268                           Value *Src, User::op_iterator IdxBegin,
 269                           User::op_iterator IdxEnd, unsigned TargetReg);
 270
 271     /// emitCastOperation - Common code shared between visitCastInst and
 272     /// constant expression cast support.
 273     ///
 274     void emitCastOperation(MachineBasicBlock *BB,MachineBasicBlock::iterator IP,
 275                            Value *Src, const Type *DestTy, unsigned TargetReg);
 276
 277     /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary
 278     /// and constant expression support.
 279     ///
 280     void emitSimpleBinaryOperation(MachineBasicBlock *BB,
 281                                    MachineBasicBlock::iterator IP,
 282                                    Value *Op0, Value *Op1,
 283                                    unsigned OperatorClass, unsigned TargetReg);
 284
 285     /// emitBinaryFPOperation - This method handles emission of floating point
 286     /// Add (0), Sub (1), Mul (2), and Div (3) operations.
 287     void emitBinaryFPOperation(MachineBasicBlock *BB,
 288                                MachineBasicBlock::iterator IP,
 289                                Value *Op0, Value *Op1,
 290                                unsigned OperatorClass, unsigned TargetReg);
 291
 292     void emitMultiply(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
 293                       Value *Op0, Value *Op1, unsigned TargetReg);
 294
 295     void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
 296                     unsigned DestReg, const Type *DestTy,
 297                     unsigned Op0Reg, unsigned Op1Reg);
 298     void doMultiplyConst(MachineBasicBlock *MBB,
 299                          MachineBasicBlock::iterator MBBI,
 300                          unsigned DestReg, const Type *DestTy,
 301                          unsigned Op0Reg, unsigned Op1Val);
 302
 303     void emitDivRemOperation(MachineBasicBlock *BB,
 304                              MachineBasicBlock::iterator IP,
 305                              Value *Op0, Value *Op1, bool isDiv,
 306                              unsigned TargetReg);
 307
 308     /// emitSetCCOperation - Common code shared between visitSetCondInst and
 309     /// constant expression support.
 310     ///
 311     void emitSetCCOperation(MachineBasicBlock *BB,
 312                             MachineBasicBlock::iterator IP,
 313                             Value *Op0, Value *Op1, unsigned Opcode,
 314                             unsigned TargetReg);
 315
 316     /// emitShiftOperation - Common code shared between visitShiftInst and
 317     /// constant expression support.
 318     ///
 319     void emitShiftOperation(MachineBasicBlock *MBB,
 320                             MachineBasicBlock::iterator IP,
 321                             Value *Op, Value *ShiftAmount, bool isLeftShift,
 322                             const Type *ResultTy, unsigned DestReg);
 323
 324     // Emit code for a 'SHLD DestReg, Op0, Op1, Amt' operation, where Amt is a
 325     // constant.
 326     void doSHLDConst(MachineBasicBlock *MBB,
 327                      MachineBasicBlock::iterator MBBI,
 328                      unsigned DestReg, unsigned Op0Reg, unsigned Op1Reg,
 329                      unsigned Op1Val);
 330
 331     /// emitSelectOperation - Common code shared between visitSelectInst and the
 332     /// constant expression support.
 333     void emitSelectOperation(MachineBasicBlock *MBB,
 334                              MachineBasicBlock::iterator IP,
 335                              Value *Cond, Value *TrueVal, Value *FalseVal,
 336                              unsigned DestReg);
 337
 338     /// copyConstantToRegister - Output the instructions required to put the
 339     /// specified constant into the specified register.
 340     ///
 341     void copyConstantToRegister(MachineBasicBlock *MBB,
 342                                 MachineBasicBlock::iterator MBBI,
 343                                 Constant *C, unsigned Reg);
 344
 345     void emitUCOMr(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
 346                    unsigned LHS, unsigned RHS);
 347
 348     /// makeAnotherReg - This method returns the next register number we haven't
 349     /// yet used.
 350     ///
 351     /// Long values are handled somewhat specially.  They are always allocated
 352     /// as pairs of 32 bit integer values.  The register number returned is the
 353     /// lower 32 bits of the long value, and the regNum+1 is the upper 32 bits
 354     /// of the long value.
 355     ///
 356     unsigned makeAnotherReg(const Type *Ty) {
 357       assert(dynamic_cast<const X86RegisterInfo*>(TM.getRegisterInfo()) &&
 358              "Current target doesn't have X86 reg info??");
 359       const X86RegisterInfo *MRI =
 360         static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
 361       if (Ty == Type::LongTy || Ty == Type::ULongTy) {
 362         const TargetRegisterClass *RC = MRI->getRegClassForType(Type::IntTy);
 363         // Create the lower part
 364         F->getSSARegMap()->createVirtualRegister(RC);
 365         // Create the upper part.
 366         return F->getSSARegMap()->createVirtualRegister(RC)-1;
 367       }
 368
 369       // Add the mapping of regnumber => reg class to MachineFunction
 370       const TargetRegisterClass *RC = MRI->getRegClassForType(Ty);
 371       return F->getSSARegMap()->createVirtualRegister(RC);
 372     }
 373
 374     /// getReg - This method turns an LLVM value into a register number.
 375     ///
 376     unsigned getReg(Value &V) { return getReg(&V); }  // Allow references
 377     unsigned getReg(Value *V) {
 378       // Just append to the end of the current bb.
 379       MachineBasicBlock::iterator It = BB->end();
 380       return getReg(V, BB, It);
 381     }
 382     unsigned getReg(Value *V, MachineBasicBlock *MBB,
 383                     MachineBasicBlock::iterator IPt);
 384
 385     /// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca
 386     /// that is to be statically allocated with the initial stack frame
 387     /// adjustment.
 388     unsigned getFixedSizedAllocaFI(AllocaInst *AI);
 389   };
 390 }
 391
 392 /// dyn_castFixedAlloca - If the specified value is a fixed size alloca
 393 /// instruction in the entry block, return it.  Otherwise, return a null
 394 /// pointer.
 395 static AllocaInst *dyn_castFixedAlloca(Value *V) {
 396   if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
 397     BasicBlock *BB = AI->getParent();
 398     if (isa<ConstantUInt>(AI->getArraySize()) && BB ==&BB->getParent()->front())
 399       return AI;
 400   }
 401   return 0;
 402 }
 403
 404 /// getReg - This method turns an LLVM value into a register number.
 405 ///
 406 unsigned X86ISel::getReg(Value *V, MachineBasicBlock *MBB,
 407                          MachineBasicBlock::iterator IPt) {
 408   // If this operand is a constant, emit the code to copy the constant into
 409   // the register here...
 410   if (Constant *C = dyn_cast<Constant>(V)) {
 411     unsigned Reg = makeAnotherReg(V->getType());
 412     copyConstantToRegister(MBB, IPt, C, Reg);
 413     return Reg;
 414   } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
 415     // Do not emit noop casts at all, unless it's a double -> float cast.
 416     if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType()) &&
 417         (CI->getType() != Type::FloatTy ||
 418          CI->getOperand(0)->getType() != Type::DoubleTy))
 419       return getReg(CI->getOperand(0), MBB, IPt);
 420   } else if (AllocaInst *AI = dyn_castFixedAlloca(V)) {
 421     // If the alloca address couldn't be folded into the instruction addressing,
 422     // emit an explicit LEA as appropriate.
 423     unsigned Reg = makeAnotherReg(V->getType());
 424     unsigned FI = getFixedSizedAllocaFI(AI);
 425     addFrameReference(BuildMI(*MBB, IPt, X86::LEA32r, 4, Reg), FI);
 426     return Reg;
 427   }
 428
 429   unsigned &Reg = RegMap[V];
 430   if (Reg == 0) {
 431     Reg = makeAnotherReg(V->getType());
 432     RegMap[V] = Reg;
 433   }
 434
 435   return Reg;
 436 }
 437
 438 /// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca
 439 /// that is to be statically allocated with the initial stack frame
 440 /// adjustment.
 441 unsigned X86ISel::getFixedSizedAllocaFI(AllocaInst *AI) {
 442   // Already computed this?
 443   std::map<AllocaInst*, unsigned>::iterator I = AllocaMap.lower_bound(AI);
 444   if (I != AllocaMap.end() && I->first == AI) return I->second;
 445
 446   const Type *Ty = AI->getAllocatedType();
 447   ConstantUInt *CUI = cast<ConstantUInt>(AI->getArraySize());
 448   unsigned TySize = TM.getTargetData().getTypeSize(Ty);
 449   TySize *= CUI->getValue();   // Get total allocated size...
 450   unsigned Alignment = TM.getTargetData().getTypeAlignment(Ty);
 451
 452   // Create a new stack object using the frame manager...
 453   int FrameIdx = F->getFrameInfo()->CreateStackObject(TySize, Alignment);
 454   AllocaMap.insert(I, std::make_pair(AI, FrameIdx));
 455   return FrameIdx;
 456 }
 457
 458
 459 /// copyConstantToRegister - Output the instructions required to put the
 460 /// specified constant into the specified register.
 461 ///
 462 void X86ISel::copyConstantToRegister(MachineBasicBlock *MBB,
 463                                      MachineBasicBlock::iterator IP,
 464                                      Constant *C, unsigned R) {
 465   if (isa<UndefValue>(C)) {
 466     switch (getClassB(C->getType())) {
 467     case cFP:
 468       // FIXME: SHOULD TEACH STACKIFIER ABOUT UNDEF VALUES!
 469       BuildMI(*MBB, IP, X86::FLD0, 0, R);
 470       return;
 471     case cLong:
 472       BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, R+1);
 473       // FALL THROUGH
 474     default:
 475       BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, R);
 476       return;
 477     }
 478   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
 479     unsigned Class = 0;
 480     switch (CE->getOpcode()) {
 481     case Instruction::GetElementPtr:
 482       emitGEPOperation(MBB, IP, CE->getOperand(0),
 483                        CE->op_begin()+1, CE->op_end(), R);
 484       return;
 485     case Instruction::Cast:
 486       emitCastOperation(MBB, IP, CE->getOperand(0), CE->getType(), R);
 487       return;
 488
 489     case Instruction::Xor: ++Class; // FALL THROUGH
 490     case Instruction::Or:  ++Class; // FALL THROUGH
 491     case Instruction::And: ++Class; // FALL THROUGH
 492     case Instruction::Sub: ++Class; // FALL THROUGH
 493     case Instruction::Add:
 494       emitSimpleBinaryOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
 495                                 Class, R);
 496       return;
 497
 498     case Instruction::Mul:
 499       emitMultiply(MBB, IP, CE->getOperand(0), CE->getOperand(1), R);
 500       return;
 501
 502     case Instruction::Div:
 503     case Instruction::Rem:
 504       emitDivRemOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
 505                           CE->getOpcode() == Instruction::Div, R);
 506       return;
 507
 508     case Instruction::SetNE:
 509     case Instruction::SetEQ:
 510     case Instruction::SetLT:
 511     case Instruction::SetGT:
 512     case Instruction::SetLE:
 513     case Instruction::SetGE:
 514       emitSetCCOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
 515                          CE->getOpcode(), R);
 516       return;
 517
 518     case Instruction::Shl:
 519     case Instruction::Shr:
 520       emitShiftOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
 521                          CE->getOpcode() == Instruction::Shl, CE->getType(), R);
 522       return;
 523
 524     case Instruction::Select:
 525       emitSelectOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
 526                           CE->getOperand(2), R);
 527       return;
 528
 529     default:
 530       std::cerr << "Offending expr: " << *C << "\n";
 531       assert(0 && "Constant expression not yet handled!\n");
 532     }
 533   }
 534
 535   if (C->getType()->isIntegral()) {
 536     unsigned Class = getClassB(C->getType());
 537
 538     if (Class == cLong) {
 539       // Copy the value into the register pair.
 540       uint64_t Val = cast<ConstantInt>(C)->getRawValue();
 541       BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addImm(Val & 0xFFFFFFFF);
 542       BuildMI(*MBB, IP, X86::MOV32ri, 1, R+1).addImm(Val >> 32);
 543       return;
 544     }
 545
 546     assert(Class <= cInt && "Type not handled yet!");
 547
 548     static const unsigned IntegralOpcodeTab[] = {
 549       X86::MOV8ri, X86::MOV16ri, X86::MOV32ri
 550     };
 551
 552     if (C->getType() == Type::BoolTy) {
 553       BuildMI(*MBB, IP, X86::MOV8ri, 1, R).addImm(C == ConstantBool::True);
 554     } else {
 555       ConstantInt *CI = cast<ConstantInt>(C);
 556       BuildMI(*MBB, IP, IntegralOpcodeTab[Class],1,R).addImm(CI->getRawValue());
 557     }
 558   } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
 559     if (CFP->isExactlyValue(+0.0))
 560       BuildMI(*MBB, IP, X86::FLD0, 0, R);
 561     else if (CFP->isExactlyValue(+1.0))
 562       BuildMI(*MBB, IP, X86::FLD1, 0, R);
 563     else {
 564       // Otherwise we need to spill the constant to memory...
 565       MachineConstantPool *CP = F->getConstantPool();
 566       unsigned CPI = CP->getConstantPoolIndex(CFP);
 567       const Type *Ty = CFP->getType();
 568
 569       assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
 570       unsigned LoadOpcode = Ty == Type::FloatTy ? X86::FLD32m : X86::FLD64m;
 571       addConstantPoolReference(BuildMI(*MBB, IP, LoadOpcode, 4, R), CPI);
 572     }
 573
 574   } else if (isa<ConstantPointerNull>(C)) {
 575     // Copy zero (null pointer) to the register.
 576     BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addImm(0);
 577   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
 578     BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addGlobalAddress(GV);
 579   } else {
 580     std::cerr << "Offending constant: " << *C << "\n";
 581     assert(0 && "Type not handled yet!");
 582   }
 583 }
 584
 585 /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function from
 586 /// the stack into virtual registers.
 587 ///
 588 void X86ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
 589   // Emit instructions to load the arguments...  On entry to a function on the
 590   // X86, the stack frame looks like this:
 591   //
 592   // [ESP] -- return address
 593   // [ESP + 4] -- first argument (leftmost lexically)
 594   // [ESP + 8] -- second argument, if first argument is four bytes in size
 595   //    ...
 596   //
 597   unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
 598   MachineFrameInfo *MFI = F->getFrameInfo();
 599
 600   for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) {
 601     bool ArgLive = !I->use_empty();
 602     unsigned Reg = ArgLive ? getReg(*I) : 0;
 603     int FI;          // Frame object index
 604
 605     switch (getClassB(I->getType())) {
 606     case cByte:
 607       if (ArgLive) {
 608         FI = MFI->CreateFixedObject(1, ArgOffset);
 609         addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Reg), FI);
 610       }
 611       break;
 612     case cShort:
 613       if (ArgLive) {
 614         FI = MFI->CreateFixedObject(2, ArgOffset);
 615         addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Reg), FI);
 616       }
 617       break;
 618     case cInt:
 619       if (ArgLive) {
 620         FI = MFI->CreateFixedObject(4, ArgOffset);
 621         addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
 622       }
 623       break;
 624     case cLong:
 625       if (ArgLive) {
 626         FI = MFI->CreateFixedObject(8, ArgOffset);
 627         addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
 628         addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg+1), FI, 4);
 629       }
 630       ArgOffset += 4;   // longs require 4 additional bytes
 631       break;
 632     case cFP:
 633       if (ArgLive) {
 634         unsigned Opcode;
 635         if (I->getType() == Type::FloatTy) {
 636           Opcode = X86::FLD32m;
 637           FI = MFI->CreateFixedObject(4, ArgOffset);
 638         } else {
 639           Opcode = X86::FLD64m;
 640           FI = MFI->CreateFixedObject(8, ArgOffset);
 641         }
 642         addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI);
 643       }
 644       if (I->getType() == Type::DoubleTy)
 645         ArgOffset += 4;   // doubles require 4 additional bytes
 646       break;
 647     default:
 648       assert(0 && "Unhandled argument type!");
 649     }
 650     ArgOffset += 4;  // Each argument takes at least 4 bytes on the stack...
 651   }
 652
 653   // If the function takes variable number of arguments, add a frame offset for
 654   // the start of the first vararg value... this is used to expand
 655   // llvm.va_start.
 656   if (Fn.getFunctionType()->isVarArg())
 657     VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
 658 }
 659
 660 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
 661 /// the main function.
 662 void X86ISel::EmitSpecialCodeForMain() {
 663   // Switch the FPU to 64-bit precision mode for better compatibility and speed.
 664   int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
 665   addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
 666
 667   // Set the high part to be 64-bit precision.
 668   addFrameReference(BuildMI(BB, X86::MOV8mi, 5),
 669                     CWFrameIdx, 1).addImm(2);
 670
 671   // Reload the modified control word now.
 672   addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
 673 }
 674
 675 /// SelectPHINodes - Insert machine code to generate phis.  This is tricky
 676 /// because we have to generate our sources into the source basic blocks, not
 677 /// the current one.
 678 ///
 679 void X86ISel::SelectPHINodes() {
 680   const TargetInstrInfo &TII = *TM.getInstrInfo();
 681   const Function &LF = *F->getFunction();  // The LLVM function...
 682   for (Function::const_iterator I = LF.begin(), E = LF.end(); I != E; ++I) {
 683     const BasicBlock *BB = I;
 684     MachineBasicBlock &MBB = *MBBMap[I];
 685
 686     // Loop over all of the PHI nodes in the LLVM basic block...
 687     MachineBasicBlock::iterator PHIInsertPoint = MBB.begin();
 688     for (BasicBlock::const_iterator I = BB->begin(); isa<PHINode>(I); ++I) {
 689       PHINode *PN = const_cast<PHINode*>(dyn_cast<PHINode>(I));
 690
 691       // Create a new machine instr PHI node, and insert it.
 692       unsigned PHIReg = getReg(*PN);
 693       MachineInstr *PhiMI = BuildMI(MBB, PHIInsertPoint,
 694                                     X86::PHI, PN->getNumOperands(), PHIReg);
 695
 696       MachineInstr *LongPhiMI = 0;
 697       if (PN->getType() == Type::LongTy || PN->getType() == Type::ULongTy)
 698         LongPhiMI = BuildMI(MBB, PHIInsertPoint,
 699                             X86::PHI, PN->getNumOperands(), PHIReg+1);
 700
 701       // PHIValues - Map of blocks to incoming virtual registers.  We use this
 702       // so that we only initialize one incoming value for a particular block,
 703       // even if the block has multiple entries in the PHI node.
 704       //
 705       std::map<MachineBasicBlock*, unsigned> PHIValues;
 706
 707       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
 708         MachineBasicBlock *PredMBB = MBBMap[PN->getIncomingBlock(i)];
 709         unsigned ValReg;
 710         std::map<MachineBasicBlock*, unsigned>::iterator EntryIt =
 711           PHIValues.lower_bound(PredMBB);
 712
 713         if (EntryIt != PHIValues.end() && EntryIt->first == PredMBB) {
 714           // We already inserted an initialization of the register for this
 715           // predecessor.  Recycle it.
 716           ValReg = EntryIt->second;
 717
 718         } else {
 719           // Get the incoming value into a virtual register.
 720           //
 721           Value *Val = PN->getIncomingValue(i);
 722
 723           // If this is a constant or GlobalValue, we may have to insert code
 724           // into the basic block to compute it into a virtual register.
 725           if ((isa<Constant>(Val) && !isa<ConstantExpr>(Val))) {
 726             // Simple constants get emitted at the end of the basic block,
 727             // before any terminator instructions.  We "know" that the code to
 728             // move a constant into a register will never clobber any flags.
 729             ValReg = getReg(Val, PredMBB, PredMBB->getFirstTerminator());
 730           } else {
 731             // Because we don't want to clobber any values which might be in
 732             // physical registers with the computation of this constant (which
 733             // might be arbitrarily complex if it is a constant expression),
 734             // just insert the computation at the top of the basic block.
 735             MachineBasicBlock::iterator PI = PredMBB->begin();
 736
 737             // Skip over any PHI nodes though!
 738             while (PI != PredMBB->end() && PI->getOpcode() == X86::PHI)
 739               ++PI;
 740
 741             ValReg = getReg(Val, PredMBB, PI);
 742           }
 743
 744           // Remember that we inserted a value for this PHI for this predecessor
 745           PHIValues.insert(EntryIt, std::make_pair(PredMBB, ValReg));
 746         }
 747
 748         PhiMI->addRegOperand(ValReg);
 749         PhiMI->addMachineBasicBlockOperand(PredMBB);
 750         if (LongPhiMI) {
 751           LongPhiMI->addRegOperand(ValReg+1);
 752           LongPhiMI->addMachineBasicBlockOperand(PredMBB);
 753         }
 754       }
 755
 756       // Now that we emitted all of the incoming values for the PHI node, make
 757       // sure to reposition the InsertPoint after the PHI that we just added.
 758       // This is needed because we might have inserted a constant into this
 759       // block, right after the PHI's which is before the old insert point!
 760       PHIInsertPoint = LongPhiMI ? LongPhiMI : PhiMI;
 761       ++PHIInsertPoint;
 762     }
 763   }
 764 }
 765
 766 /// RequiresFPRegKill - The floating point stackifier pass cannot insert
 767 /// compensation code on critical edges.  As such, it requires that we kill all
 768 /// FP registers on the exit from any blocks that either ARE critical edges, or
 769 /// branch to a block that has incoming critical edges.
 770 ///
 771 /// Note that this kill instruction will eventually be eliminated when
 772 /// restrictions in the stackifier are relaxed.
 773 ///
 774 static bool RequiresFPRegKill(const MachineBasicBlock *MBB) {
 775 #if 0
 776   const BasicBlock *BB = MBB->getBasicBlock ();
 777   for (succ_const_iterator SI = succ_begin(BB), E = succ_end(BB); SI!=E; ++SI) {
 778     const BasicBlock *Succ = *SI;
 779     pred_const_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
 780     ++PI;  // Block have at least one predecessory
 781     if (PI != PE) {             // If it has exactly one, this isn't crit edge
 782       // If this block has more than one predecessor, check all of the
 783       // predecessors to see if they have multiple successors.  If so, then the
 784       // block we are analyzing needs an FPRegKill.
 785       for (PI = pred_begin(Succ); PI != PE; ++PI) {
 786         const BasicBlock *Pred = *PI;
 787         succ_const_iterator SI2 = succ_begin(Pred);
 788         ++SI2;  // There must be at least one successor of this block.
 789         if (SI2 != succ_end(Pred))
 790           return true;   // Yes, we must insert the kill on this edge.
 791       }
 792     }
 793   }
 794   // If we got this far, there is no need to insert the kill instruction.
 795   return false;
 796 #else
 797   return true;
 798 #endif
 799 }
 800
 801 // InsertFPRegKills - Insert FP_REG_KILL instructions into basic blocks that
 802 // need them.  This only occurs due to the floating point stackifier not being
 803 // aggressive enough to handle arbitrary global stackification.
 804 //
 805 // Currently we insert an FP_REG_KILL instruction into each block that uses or
 806 // defines a floating point virtual register.
 807 //
 808 // When the global register allocators (like linear scan) finally update live
 809 // variable analysis, we can keep floating point values in registers across
 810 // portions of the CFG that do not involve critical edges.  This will be a big
 811 // win, but we are waiting on the global allocators before we can do this.
 812 //
 813 // With a bit of work, the floating point stackifier pass can be enhanced to
 814 // break critical edges as needed (to make a place to put compensation code),
 815 // but this will require some infrastructure improvements as well.
 816 //
 817 void X86ISel::InsertFPRegKills() {
 818   SSARegMap &RegMap = *F->getSSARegMap();
 819
 820   for (MachineFunction::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
 821     for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
 822       for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
 823       MachineOperand& MO = I->getOperand(i);
 824         if (MO.isRegister() && MO.getReg()) {
 825           unsigned Reg = MO.getReg();
 826           if (MRegisterInfo::isVirtualRegister(Reg)) {
 827             unsigned RegSize = RegMap.getRegClass(Reg)->getSize();
 828             if (RegSize == 10 || RegSize == 8)
 829               goto UsesFPReg;
 830           }
 831         }
 832       }
 833     // If we haven't found an FP register use or def in this basic block, check
 834     // to see if any of our successors has an FP PHI node, which will cause a
 835     // copy to be inserted into this block.
 836     for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
 837          SE = BB->succ_end(); SI != SE; ++SI) {
 838       MachineBasicBlock *SBB = *SI;
 839       for (MachineBasicBlock::iterator I = SBB->begin();
 840            I != SBB->end() && I->getOpcode() == X86::PHI; ++I) {
 841         const TargetRegisterClass *RC =
 842           RegMap.getRegClass(I->getOperand(0).getReg());
 843         if (RC->getSize() == 10 || RC->getSize() == 8)
 844           goto UsesFPReg;
 845       }
 846     }
 847     continue;
 848   UsesFPReg:
 849     // Okay, this block uses an FP register.  If the block has successors (ie,
 850     // it's not an unwind/return), insert the FP_REG_KILL instruction.
 851     if (BB->succ_size () && RequiresFPRegKill(BB)) {
 852       BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
 853       ++NumFPKill;
 854     }
 855   }
 856 }
 857
 858
 859 void X86ISel::getAddressingMode(Value *Addr, X86AddressMode &AM) {
 860   AM.BaseType = X86AddressMode::RegBase;
 861   AM.Base.Reg = 0; AM.Scale = 1; AM.IndexReg = 0; AM.Disp = 0;
 862   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) {
 863     if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(),
 864                        AM))
 865       return;
 866   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
 867     if (CE->getOpcode() == Instruction::GetElementPtr)
 868       if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(),
 869                         AM))
 870         return;
 871   } else if (AllocaInst *AI = dyn_castFixedAlloca(Addr)) {
 872     AM.BaseType = X86AddressMode::FrameIndexBase;
 873     AM.Base.FrameIndex = getFixedSizedAllocaFI(AI);
 874     return;
 875   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
 876     AM.GV = GV;
 877     return;
 878   }
 879
 880   // If it's not foldable, reset addr mode.
 881   AM.BaseType = X86AddressMode::RegBase;
 882   AM.Base.Reg = getReg(Addr);
 883   AM.Scale = 1; AM.IndexReg = 0; AM.Disp = 0;
 884 }
 885
 886 // canFoldSetCCIntoBranchOrSelect - Return the setcc instruction if we can fold
 887 // it into the conditional branch or select instruction which is the only user
 888 // of the cc instruction.  This is the case if the conditional branch is the
 889 // only user of the setcc.  We also don't handle long arguments below, so we
 890 // reject them here as well.
 891 //
 892 static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) {
 893   if (SetCondInst *SCI = dyn_cast<SetCondInst>(V))
 894     if (SCI->hasOneUse()) {
 895       Instruction *User = cast<Instruction>(SCI->use_back());
 896       if ((isa<BranchInst>(User) || isa<SelectInst>(User)) &&
 897           (getClassB(SCI->getOperand(0)->getType()) != cLong ||
 898            SCI->getOpcode() == Instruction::SetEQ ||
 899            SCI->getOpcode() == Instruction::SetNE) &&
 900           (isa<BranchInst>(User) || User->getOperand(0) == V))
 901         return SCI;
 902     }
 903   return 0;
 904 }
 905
 906 // Return a fixed numbering for setcc instructions which does not depend on the
 907 // order of the opcodes.
 908 //
 909 static unsigned getSetCCNumber(unsigned Opcode) {
 910   switch(Opcode) {
 911   default: assert(0 && "Unknown setcc instruction!");
 912   case Instruction::SetEQ: return 0;
 913   case Instruction::SetNE: return 1;
 914   case Instruction::SetLT: return 2;
 915   case Instruction::SetGE: return 3;
 916   case Instruction::SetGT: return 4;
 917   case Instruction::SetLE: return 5;
 918   }
 919 }
 920
 921 // LLVM  -> X86 signed  X86 unsigned
 922 // -----    ----------  ------------
 923 // seteq -> sete        sete
 924 // setne -> setne       setne
 925 // setlt -> setl        setb
 926 // setge -> setge       setae
 927 // setgt -> setg        seta
 928 // setle -> setle       setbe
 929 // ----
 930 //          sets                       // Used by comparison with 0 optimization
 931 //          setns
 932 static const unsigned SetCCOpcodeTab[2][8] = {
 933   { X86::SETEr, X86::SETNEr, X86::SETBr, X86::SETAEr, X86::SETAr, X86::SETBEr,
 934     0, 0 },
 935   { X86::SETEr, X86::SETNEr, X86::SETLr, X86::SETGEr, X86::SETGr, X86::SETLEr,
 936     X86::SETSr, X86::SETNSr },
 937 };
 938
 939 /// emitUCOMr - In the future when we support processors before the P6, this
 940 /// wraps the logic for emitting an FUCOMr vs FUCOMIr.
 941 void X86ISel::emitUCOMr(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
 942                         unsigned LHS, unsigned RHS) {
 943   if (0) { // for processors prior to the P6
 944     BuildMI(*MBB, IP, X86::FUCOMr, 2).addReg(LHS).addReg(RHS);
 945     BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
 946     BuildMI(*MBB, IP, X86::SAHF, 1);
 947   } else {
 948     BuildMI(*MBB, IP, X86::FUCOMIr, 2).addReg(LHS).addReg(RHS);
 949   }
 950 }
 951
 952 // EmitComparison - This function emits a comparison of the two operands,
 953 // returning the extended setcc code to use.
 954 unsigned X86ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
 955                                  MachineBasicBlock *MBB,
 956                                  MachineBasicBlock::iterator IP) {
 957   // The arguments are already supposed to be of the same type.
 958   const Type *CompTy = Op0->getType();
 959   unsigned Class = getClassB(CompTy);
 960
 961   // Special case handling of: cmp R, i
 962   if (isa<ConstantPointerNull>(Op1)) {
 963     unsigned Op0r = getReg(Op0, MBB, IP);
 964     if (OpNum < 2)    // seteq/setne -> test
 965       BuildMI(*MBB, IP, X86::TEST32rr, 2).addReg(Op0r).addReg(Op0r);
 966     else
 967       BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(0);
 968     return OpNum;
 969
 970   } else if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
 971     if (Class == cByte || Class == cShort || Class == cInt) {
 972       unsigned Op1v = CI->getRawValue();
 973
 974       // Mask off any upper bits of the constant, if there are any...
 975       Op1v &= (1ULL << (8 << Class)) - 1;
 976
 977       // If this is a comparison against zero, emit more efficient code.  We
 978       // can't handle unsigned comparisons against zero unless they are == or
 979       // !=.  These should have been strength reduced already anyway.
 980       if (Op1v == 0 && (CompTy->isSigned() || OpNum < 2)) {
 981
 982         // If this is a comparison against zero and the LHS is an and of a
 983         // register with a constant, use the test to do the and.
 984         if (Instruction *Op0I = dyn_cast<Instruction>(Op0))
 985           if (Op0I->getOpcode() == Instruction::And && Op0->hasOneUse() &&
 986               isa<ConstantInt>(Op0I->getOperand(1))) {
 987             static const unsigned TESTTab[] = {
 988               X86::TEST8ri, X86::TEST16ri, X86::TEST32ri
 989             };
 990
 991             // Emit test X, i
 992             unsigned LHS = getReg(Op0I->getOperand(0), MBB, IP);
 993             unsigned Imm =
 994               cast<ConstantInt>(Op0I->getOperand(1))->getRawValue();
 995             BuildMI(*MBB, IP, TESTTab[Class], 2).addReg(LHS).addImm(Imm);
 996
 997             if (OpNum == 2) return 6;   // Map jl -> js
 998             if (OpNum == 3) return 7;   // Map jg -> jns
 999             return OpNum;
1000           }
1001
1002         unsigned Op0r = getReg(Op0, MBB, IP);
1003         static const unsigned TESTTab[] = {
1004           X86::TEST8rr, X86::TEST16rr, X86::TEST32rr
1005         };
1006         BuildMI(*MBB, IP, TESTTab[Class], 2).addReg(Op0r).addReg(Op0r);
1007
1008         if (OpNum == 2) return 6;   // Map jl -> js
1009         if (OpNum == 3) return 7;   // Map jg -> jns
1010         return OpNum;
1011       }
1012
1013       static const unsigned CMPTab[] = {
1014         X86::CMP8ri, X86::CMP16ri, X86::CMP32ri
1015       };
1016
1017       unsigned Op0r = getReg(Op0, MBB, IP);
1018       BuildMI(*MBB, IP, CMPTab[Class], 2).addReg(Op0r).addImm(Op1v);
1019       return OpNum;
1020     } else {
1021       unsigned Op0r = getReg(Op0, MBB, IP);
1022       assert(Class == cLong && "Unknown integer class!");
1023       unsigned LowCst = CI->getRawValue();
1024       unsigned HiCst = CI->getRawValue() >> 32;
1025       if (OpNum < 2) {    // seteq, setne
1026         unsigned LoTmp = Op0r;
1027         if (LowCst != 0) {
1028           LoTmp = makeAnotherReg(Type::IntTy);
1029           BuildMI(*MBB, IP, X86::XOR32ri, 2, LoTmp).addReg(Op0r).addImm(LowCst);
1030         }
1031         unsigned HiTmp = Op0r+1;
1032         if (HiCst != 0) {
1033           HiTmp = makeAnotherReg(Type::IntTy);
1034           BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst);
1035         }
1036         unsigned FinalTmp = makeAnotherReg(Type::IntTy);
1037         BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
1038         return OpNum;
1039       } else {
1040         // Emit a sequence of code which compares the high and low parts once
1041         // each, then uses a conditional move to handle the overflow case.  For
1042         // example, a setlt for long would generate code like this:
1043         //
1044         // AL = lo(op1) < lo(op2)   // Always unsigned comparison
1045         // BL = hi(op1) < hi(op2)   // Signedness depends on operands
1046         // dest = hi(op1) == hi(op2) ? BL : AL;
1047         //
1048
1049         // FIXME: This would be much better if we had hierarchical register
1050         // classes!  Until then, hardcode registers so that we can deal with
1051         // their aliases (because we don't have conditional byte moves).
1052         //
1053         BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst);
1054         BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
1055         BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst);
1056         BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL);
1057         BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
1058         BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
1059         BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX)
1060           .addReg(X86::AX);
1061         // NOTE: visitSetCondInst knows that the value is dumped into the BL
1062         // register at this point for long values...
1063         return OpNum;
1064       }
1065     }
1066   }
1067
1068   unsigned Op0r = getReg(Op0, MBB, IP);
1069
1070   // Special case handling of comparison against +/- 0.0
1071   if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op1))
1072     if (CFP->isExactlyValue(+0.0) || CFP->isExactlyValue(-0.0)) {
1073       BuildMI(*MBB, IP, X86::FTST, 1).addReg(Op0r);
1074       BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
1075       BuildMI(*MBB, IP, X86::SAHF, 1);
1076       return OpNum;
1077     }
1078
1079   unsigned Op1r = getReg(Op1, MBB, IP);
1080   switch (Class) {
1081   default: assert(0 && "Unknown type class!");
1082     // Emit: cmp <var1>, <var2> (do the comparison).  We can
1083     // compare 8-bit with 8-bit, 16-bit with 16-bit, 32-bit with
1084     // 32-bit.
1085   case cByte:
1086     BuildMI(*MBB, IP, X86::CMP8rr, 2).addReg(Op0r).addReg(Op1r);
1087     break;
1088   case cShort:
1089     BuildMI(*MBB, IP, X86::CMP16rr, 2).addReg(Op0r).addReg(Op1r);
1090     break;
1091   case cInt:
1092     BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r);
1093     break;
1094   case cFP:
1095     emitUCOMr(MBB, IP, Op0r, Op1r);
1096     break;
1097
1098   case cLong:
1099     if (OpNum < 2) {    // seteq, setne
1100       unsigned LoTmp = makeAnotherReg(Type::IntTy);
1101       unsigned HiTmp = makeAnotherReg(Type::IntTy);
1102       unsigned FinalTmp = makeAnotherReg(Type::IntTy);
1103       BuildMI(*MBB, IP, X86::XOR32rr, 2, LoTmp).addReg(Op0r).addReg(Op1r);
1104       BuildMI(*MBB, IP, X86::XOR32rr, 2, HiTmp).addReg(Op0r+1).addReg(Op1r+1);
1105       BuildMI(*MBB, IP, X86::OR32rr,  2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
1106       break;  // Allow the sete or setne to be generated from flags set by OR
1107     } else {
1108       // Emit a sequence of code which compares the high and low parts once
1109       // each, then uses a conditional move to handle the overflow case.  For
1110       // example, a setlt for long would generate code like this:
1111       //
1112       // AL = lo(op1) < lo(op2)   // Signedness depends on operands
1113       // BL = hi(op1) < hi(op2)   // Always unsigned comparison
1114       // dest = hi(op1) == hi(op2) ? BL : AL;
1115       //
1116
1117       // FIXME: This would be much better if we had hierarchical register
1118       // classes!  Until then, hardcode registers so that we can deal with their
1119       // aliases (because we don't have conditional byte moves).
1120       //
1121       BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r);
1122       BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
1123       BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r+1).addReg(Op1r+1);
1124       BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0, X86::BL);
1125       BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
1126       BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
1127       BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX)
1128                                                    .addReg(X86::AX);
1129       // NOTE: visitSetCondInst knows that the value is dumped into the BL
1130       // register at this point for long values...
1131       return OpNum;
1132     }
1133   }
1134   return OpNum;
1135 }
1136
1137 /// SetCC instructions - Here we just emit boilerplate code to set a byte-sized
1138 /// register, then move it to wherever the result should be.
1139 ///
1140 void X86ISel::visitSetCondInst(SetCondInst &I) {
1141   if (canFoldSetCCIntoBranchOrSelect(&I))
1142     return;  // Fold this into a branch or select.
1143
1144   unsigned DestReg = getReg(I);
1145   MachineBasicBlock::iterator MII = BB->end();
1146   emitSetCCOperation(BB, MII, I.getOperand(0), I.getOperand(1), I.getOpcode(),
1147                      DestReg);
1148 }
1149
1150 /// emitSetCCOperation - Common code shared between visitSetCondInst and
1151 /// constant expression support.
1152 ///
1153 void X86ISel::emitSetCCOperation(MachineBasicBlock *MBB,
1154                                  MachineBasicBlock::iterator IP,
1155                                  Value *Op0, Value *Op1, unsigned Opcode,
1156                                  unsigned TargetReg) {
1157   unsigned OpNum = getSetCCNumber(Opcode);
1158   OpNum = EmitComparison(OpNum, Op0, Op1, MBB, IP);
1159
1160   const Type *CompTy = Op0->getType();
1161   unsigned CompClass = getClassB(CompTy);
1162   bool isSigned = CompTy->isSigned() && CompClass != cFP;
1163
1164   if (CompClass != cLong || OpNum < 2) {
1165     // Handle normal comparisons with a setcc instruction...
1166     BuildMI(*MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, TargetReg);
1167   } else {
1168     // Handle long comparisons by copying the value which is already in BL into
1169     // the register we want...
1170     BuildMI(*MBB, IP, X86::MOV8rr, 1, TargetReg).addReg(X86::BL);
1171   }
1172 }
1173
1174 void X86ISel::visitSelectInst(SelectInst &SI) {
1175   unsigned DestReg = getReg(SI);
1176   MachineBasicBlock::iterator MII = BB->end();
1177   emitSelectOperation(BB, MII, SI.getCondition(), SI.getTrueValue(),
1178                       SI.getFalseValue(), DestReg);
1179 }
1180
1181 /// emitSelect - Common code shared between visitSelectInst and the constant
1182 /// expression support.
1183 void X86ISel::emitSelectOperation(MachineBasicBlock *MBB,
1184                                   MachineBasicBlock::iterator IP,
1185                                   Value *Cond, Value *TrueVal, Value *FalseVal,
1186                                   unsigned DestReg) {
1187   unsigned SelectClass = getClassB(TrueVal->getType());
1188
1189   // We don't support 8-bit conditional moves.  If we have incoming constants,
1190   // transform them into 16-bit constants to avoid having a run-time conversion.
1191   if (SelectClass == cByte) {
1192     if (Constant *T = dyn_cast<Constant>(TrueVal))
1193       TrueVal = ConstantExpr::getCast(T, Type::ShortTy);
1194     if (Constant *F = dyn_cast<Constant>(FalseVal))
1195       FalseVal = ConstantExpr::getCast(F, Type::ShortTy);
1196   }
1197
1198   unsigned TrueReg  = getReg(TrueVal, MBB, IP);
1199   unsigned FalseReg = getReg(FalseVal, MBB, IP);
1200   if (TrueReg == FalseReg) {
1201     static const unsigned Opcode[] = {
1202       X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::MOV32rr
1203     };
1204     BuildMI(*MBB, IP, Opcode[SelectClass], 1, DestReg).addReg(TrueReg);
1205     if (SelectClass == cLong)
1206       BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(TrueReg+1);
1207     return;
1208   }
1209
1210   unsigned Opcode;
1211   if (SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(Cond)) {
1212     // We successfully folded the setcc into the select instruction.
1213
1214     unsigned OpNum = getSetCCNumber(SCI->getOpcode());
1215     OpNum = EmitComparison(OpNum, SCI->getOperand(0), SCI->getOperand(1), MBB,
1216                            IP);
1217
1218     const Type *CompTy = SCI->getOperand(0)->getType();
1219     bool isSigned = CompTy->isSigned() && getClassB(CompTy) != cFP;
1220
1221     // LLVM  -> X86 signed  X86 unsigned
1222     // -----    ----------  ------------
1223     // seteq -> cmovNE      cmovNE
1224     // setne -> cmovE       cmovE
1225     // setlt -> cmovGE      cmovAE
1226     // setge -> cmovL       cmovB
1227     // setgt -> cmovLE      cmovBE
1228     // setle -> cmovG       cmovA
1229     // ----
1230     //          cmovNS              // Used by comparison with 0 optimization
1231     //          cmovS
1232
1233     switch (SelectClass) {
1234     default: assert(0 && "Unknown value class!");
1235     case cFP: {
1236       // Annoyingly, we don't have a full set of floating point conditional
1237       // moves.  :(
1238       static const unsigned OpcodeTab[2][8] = {
1239         { X86::FCMOVNE, X86::FCMOVE, X86::FCMOVAE, X86::FCMOVB,
1240           X86::FCMOVBE, X86::FCMOVA, 0, 0 },
1241         { X86::FCMOVNE, X86::FCMOVE, 0, 0, 0, 0, 0, 0 },
1242       };
1243       Opcode = OpcodeTab[isSigned][OpNum];
1244
1245       // If opcode == 0, we hit a case that we don't support.  Output a setcc
1246       // and compare the result against zero.
1247       if (Opcode == 0) {
1248         unsigned CompClass = getClassB(CompTy);
1249         unsigned CondReg;
1250         if (CompClass != cLong || OpNum < 2) {
1251           CondReg = makeAnotherReg(Type::BoolTy);
1252           // Handle normal comparisons with a setcc instruction...
1253           BuildMI(*MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, CondReg);
1254         } else {
1255           // Long comparisons end up in the BL register.
1256           CondReg = X86::BL;
1257         }
1258
1259         BuildMI(*MBB, IP, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
1260         Opcode = X86::FCMOVE;
1261       }
1262       break;
1263     }
1264     case cByte:
1265     case cShort: {
1266       static const unsigned OpcodeTab[2][8] = {
1267         { X86::CMOVNE16rr, X86::CMOVE16rr, X86::CMOVAE16rr, X86::CMOVB16rr,
1268           X86::CMOVBE16rr, X86::CMOVA16rr, 0, 0 },
1269         { X86::CMOVNE16rr, X86::CMOVE16rr, X86::CMOVGE16rr, X86::CMOVL16rr,
1270           X86::CMOVLE16rr, X86::CMOVG16rr, X86::CMOVNS16rr, X86::CMOVS16rr },
1271       };
1272       Opcode = OpcodeTab[isSigned][OpNum];
1273       break;
1274     }
1275     case cInt:
1276     case cLong: {
1277       static const unsigned OpcodeTab[2][8] = {
1278         { X86::CMOVNE32rr, X86::CMOVE32rr, X86::CMOVAE32rr, X86::CMOVB32rr,
1279           X86::CMOVBE32rr, X86::CMOVA32rr, 0, 0 },
1280         { X86::CMOVNE32rr, X86::CMOVE32rr, X86::CMOVGE32rr, X86::CMOVL32rr,
1281           X86::CMOVLE32rr, X86::CMOVG32rr, X86::CMOVNS32rr, X86::CMOVS32rr },
1282       };
1283       Opcode = OpcodeTab[isSigned][OpNum];
1284       break;
1285     }
1286     }
1287   } else {
1288     // Get the value being branched on, and use it to set the condition codes.
1289     unsigned CondReg = getReg(Cond, MBB, IP);
1290     BuildMI(*MBB, IP, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
1291     switch (SelectClass) {
1292     default: assert(0 && "Unknown value class!");
1293     case cFP:    Opcode = X86::FCMOVE; break;
1294     case cByte:
1295     case cShort: Opcode = X86::CMOVE16rr; break;
1296     case cInt:
1297     case cLong:  Opcode = X86::CMOVE32rr; break;
1298     }
1299   }
1300
1301   unsigned RealDestReg = DestReg;
1302
1303
1304   // Annoyingly enough, X86 doesn't HAVE 8-bit conditional moves.  Because of
1305   // this, we have to promote the incoming values to 16 bits, perform a 16-bit
1306   // cmove, then truncate the result.
1307   if (SelectClass == cByte) {
1308     DestReg = makeAnotherReg(Type::ShortTy);
1309     if (getClassB(TrueVal->getType()) == cByte) {
1310       // Promote the true value, by storing it into AL, and reading from AX.
1311       BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::AL).addReg(TrueReg);
1312       BuildMI(*MBB, IP, X86::MOV8ri, 1, X86::AH).addImm(0);
1313       TrueReg = makeAnotherReg(Type::ShortTy);
1314       BuildMI(*MBB, IP, X86::MOV16rr, 1, TrueReg).addReg(X86::AX);
1315     }
1316     if (getClassB(FalseVal->getType()) == cByte) {
1317       // Promote the true value, by storing it into CL, and reading from CX.
1318       BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(FalseReg);
1319       BuildMI(*MBB, IP, X86::MOV8ri, 1, X86::CH).addImm(0);
1320       FalseReg = makeAnotherReg(Type::ShortTy);
1321       BuildMI(*MBB, IP, X86::MOV16rr, 1, FalseReg).addReg(X86::CX);
1322     }
1323   }
1324
1325   BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(TrueReg).addReg(FalseReg);
1326
1327   switch (SelectClass) {
1328   case cByte:
1329     // We did the computation with 16-bit registers.  Truncate back to our
1330     // result by copying into AX then copying out AL.
1331     BuildMI(*MBB, IP, X86::MOV16rr, 1, X86::AX).addReg(DestReg);
1332     BuildMI(*MBB, IP, X86::MOV8rr, 1, RealDestReg).addReg(X86::AL);
1333     break;
1334   case cLong:
1335     // Move the upper half of the value as well.
1336     BuildMI(*MBB, IP, Opcode, 2,DestReg+1).addReg(TrueReg+1).addReg(FalseReg+1);
1337     break;
1338   }
1339 }
1340
1341
1342
1343 /// promote32 - Emit instructions to turn a narrow operand into a 32-bit-wide
1344 /// operand, in the specified target register.
1345 ///
1346 void X86ISel::promote32(unsigned targetReg, const ValueRecord &VR) {
1347   bool isUnsigned = VR.Ty->isUnsigned() || VR.Ty == Type::BoolTy;
1348
1349   Value *Val = VR.Val;
1350   const Type *Ty = VR.Ty;
1351   if (Val) {
1352     if (Constant *C = dyn_cast<Constant>(Val)) {
1353       Val = ConstantExpr::getCast(C, Type::IntTy);
1354       Ty = Type::IntTy;
1355     }
1356
1357     // If this is a simple constant, just emit a MOVri directly to avoid the
1358     // copy.
1359     if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
1360       int TheVal = CI->getRawValue() & 0xFFFFFFFF;
1361       BuildMI(BB, X86::MOV32ri, 1, targetReg).addImm(TheVal);
1362       return;
1363     }
1364   }
1365
1366   // Make sure we have the register number for this value...
1367   unsigned Reg = Val ? getReg(Val) : VR.Reg;
1368
1369   switch (getClassB(Ty)) {
1370   case cByte:
1371     // Extend value into target register (8->32)
1372     if (isUnsigned)
1373       BuildMI(BB, X86::MOVZX32rr8, 1, targetReg).addReg(Reg);
1374     else
1375       BuildMI(BB, X86::MOVSX32rr8, 1, targetReg).addReg(Reg);
1376     break;
1377   case cShort:
1378     // Extend value into target register (16->32)
1379     if (isUnsigned)
1380       BuildMI(BB, X86::MOVZX32rr16, 1, targetReg).addReg(Reg);
1381     else
1382       BuildMI(BB, X86::MOVSX32rr16, 1, targetReg).addReg(Reg);
1383     break;
1384   case cInt:
1385     // Move value into target register (32->32)
1386     BuildMI(BB, X86::MOV32rr, 1, targetReg).addReg(Reg);
1387     break;
1388   default:
1389     assert(0 && "Unpromotable operand class in promote32");
1390   }
1391 }
1392
1393 /// 'ret' instruction - Here we are interested in meeting the x86 ABI.  As such,
1394 /// we have the following possibilities:
1395 ///
1396 ///   ret void: No return value, simply emit a 'ret' instruction
1397 ///   ret sbyte, ubyte : Extend value into EAX and return
1398 ///   ret short, ushort: Extend value into EAX and return
1399 ///   ret int, uint    : Move value into EAX and return
1400 ///   ret pointer      : Move value into EAX and return
1401 ///   ret long, ulong  : Move value into EAX/EDX and return
1402 ///   ret float/double : Top of FP stack
1403 ///
1404 void X86ISel::visitReturnInst(ReturnInst &I) {
1405   if (I.getNumOperands() == 0) {
1406     BuildMI(BB, X86::RET, 0); // Just emit a 'ret' instruction
1407     return;
1408   }
1409
1410   Value *RetVal = I.getOperand(0);
1411   switch (getClassB(RetVal->getType())) {
1412   case cByte:   // integral return values: extend or move into EAX and return
1413   case cShort:
1414   case cInt:
1415     promote32(X86::EAX, ValueRecord(RetVal));
1416     // Declare that EAX is live on exit
1417     BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::EAX).addReg(X86::ESP);
1418     break;
1419   case cFP: {                  // Floats & Doubles: Return in ST(0)
1420     unsigned RetReg = getReg(RetVal);
1421     BuildMI(BB, X86::FpSETRESULT, 1).addReg(RetReg);
1422     // Declare that top-of-stack is live on exit
1423     BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::ST0).addReg(X86::ESP);
1424     break;
1425   }
1426   case cLong: {
1427     unsigned RetReg = getReg(RetVal);
1428     BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(RetReg);
1429     BuildMI(BB, X86::MOV32rr, 1, X86::EDX).addReg(RetReg+1);
1430     // Declare that EAX & EDX are live on exit
1431     BuildMI(BB, X86::IMPLICIT_USE, 3).addReg(X86::EAX).addReg(X86::EDX)
1432       .addReg(X86::ESP);
1433     break;
1434   }
1435   default:
1436     visitInstruction(I);
1437   }
1438   // Emit a 'ret' instruction
1439   BuildMI(BB, X86::RET, 0);
1440 }
1441
1442 // getBlockAfter - Return the basic block which occurs lexically after the
1443 // specified one.
1444 static inline BasicBlock *getBlockAfter(BasicBlock *BB) {
1445   Function::iterator I = BB; ++I;  // Get iterator to next block
1446   return I != BB->getParent()->end() ? &*I : 0;
1447 }
1448
1449 /// visitBranchInst - Handle conditional and unconditional branches here.  Note
1450 /// that since code layout is frozen at this point, that if we are trying to
1451 /// jump to a block that is the immediate successor of the current block, we can
1452 /// just make a fall-through (but we don't currently).
1453 ///
1454 void X86ISel::visitBranchInst(BranchInst &BI) {
1455   // Update machine-CFG edges
1456   BB->addSuccessor (MBBMap[BI.getSuccessor(0)]);
1457   if (BI.isConditional())
1458     BB->addSuccessor (MBBMap[BI.getSuccessor(1)]);
1459
1460   BasicBlock *NextBB = getBlockAfter(BI.getParent());  // BB after current one
1461
1462   if (!BI.isConditional()) {  // Unconditional branch?
1463     if (BI.getSuccessor(0) != NextBB)
1464       BuildMI(BB, X86::JMP, 1).addMBB(MBBMap[BI.getSuccessor(0)]);
1465     return;
1466   }
1467
1468   // See if we can fold the setcc into the branch itself...
1469   SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(BI.getCondition());
1470   if (SCI == 0) {
1471     // Nope, cannot fold setcc into this branch.  Emit a branch on a condition
1472     // computed some other way...
1473     unsigned condReg = getReg(BI.getCondition());
1474     BuildMI(BB, X86::TEST8rr, 2).addReg(condReg).addReg(condReg);
1475     if (BI.getSuccessor(1) == NextBB) {
1476       if (BI.getSuccessor(0) != NextBB)
1477         BuildMI(BB, X86::JNE, 1).addMBB(MBBMap[BI.getSuccessor(0)]);
1478     } else {
1479       BuildMI(BB, X86::JE, 1).addMBB(MBBMap[BI.getSuccessor(1)]);
1480
1481       if (BI.getSuccessor(0) != NextBB)
1482         BuildMI(BB, X86::JMP, 1).addMBB(MBBMap[BI.getSuccessor(0)]);
1483     }
1484     return;
1485   }
1486
1487   unsigned OpNum = getSetCCNumber(SCI->getOpcode());
1488   MachineBasicBlock::iterator MII = BB->end();
1489   OpNum = EmitComparison(OpNum, SCI->getOperand(0), SCI->getOperand(1), BB,MII);
1490
1491   const Type *CompTy = SCI->getOperand(0)->getType();
1492   bool isSigned = CompTy->isSigned() && getClassB(CompTy) != cFP;
1493
1494
1495   // LLVM  -> X86 signed  X86 unsigned
1496   // -----    ----------  ------------
1497   // seteq -> je          je
1498   // setne -> jne         jne
1499   // setlt -> jl          jb
1500   // setge -> jge         jae
1501   // setgt -> jg          ja
1502   // setle -> jle         jbe
1503   // ----
1504   //          js                  // Used by comparison with 0 optimization
1505   //          jns
1506
1507   static const unsigned OpcodeTab[2][8] = {
1508     { X86::JE, X86::JNE, X86::JB, X86::JAE, X86::JA, X86::JBE, 0, 0 },
1509     { X86::JE, X86::JNE, X86::JL, X86::JGE, X86::JG, X86::JLE,
1510       X86::JS, X86::JNS },
1511   };
1512
1513   if (BI.getSuccessor(0) != NextBB) {
1514     BuildMI(BB, OpcodeTab[isSigned][OpNum], 1)
1515       .addMBB(MBBMap[BI.getSuccessor(0)]);
1516     if (BI.getSuccessor(1) != NextBB)
1517       BuildMI(BB, X86::JMP, 1).addMBB(MBBMap[BI.getSuccessor(1)]);
1518   } else {
1519     // Change to the inverse condition...
1520     if (BI.getSuccessor(1) != NextBB) {
1521       OpNum ^= 1;
1522       BuildMI(BB, OpcodeTab[isSigned][OpNum], 1)
1523         .addMBB(MBBMap[BI.getSuccessor(1)]);
1524     }
1525   }
1526 }
1527
1528
1529 /// doCall - This emits an abstract call instruction, setting up the arguments
1530 /// and the return value as appropriate.  For the actual function call itself,
1531 /// it inserts the specified CallMI instruction into the stream.
1532 ///
1533 void X86ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
1534                      const std::vector<ValueRecord> &Args) {
1535   // Count how many bytes are to be pushed on the stack...
1536   unsigned NumBytes = 0;
1537
1538   if (!Args.empty()) {
1539     for (unsigned i = 0, e = Args.size(); i != e; ++i)
1540       switch (getClassB(Args[i].Ty)) {
1541       case cByte: case cShort: case cInt:
1542         NumBytes += 4; break;
1543       case cLong:
1544         NumBytes += 8; break;
1545       case cFP:
1546         NumBytes += Args[i].Ty == Type::FloatTy ? 4 : 8;
1547         break;
1548       default: assert(0 && "Unknown class!");
1549       }
1550
1551     // Adjust the stack pointer for the new arguments...
1552     BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addImm(NumBytes);
1553
1554     // Arguments go on the stack in reverse order, as specified by the ABI.
1555     unsigned ArgOffset = 0;
1556     for (unsigned i = 0, e = Args.size(); i != e; ++i) {
1557       unsigned ArgReg;
1558       switch (getClassB(Args[i].Ty)) {
1559       case cByte:
1560         if (Args[i].Val && isa<ConstantBool>(Args[i].Val)) {
1561           addRegOffset(BuildMI(BB, X86::MOV32mi, 5), X86::ESP, ArgOffset)
1562             .addImm(Args[i].Val == ConstantBool::True);
1563           break;
1564         }
1565         // FALL THROUGH
1566       case cShort:
1567         if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) {
1568           // Zero/Sign extend constant, then stuff into memory.
1569           ConstantInt *Val = cast<ConstantInt>(Args[i].Val);
1570           Val = cast<ConstantInt>(ConstantExpr::getCast(Val, Type::IntTy));
1571           addRegOffset(BuildMI(BB, X86::MOV32mi, 5), X86::ESP, ArgOffset)
1572             .addImm(Val->getRawValue() & 0xFFFFFFFF);
1573         } else {
1574           // Promote arg to 32 bits wide into a temporary register...
1575           ArgReg = makeAnotherReg(Type::UIntTy);
1576           promote32(ArgReg, Args[i]);
1577           addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
1578                        X86::ESP, ArgOffset).addReg(ArgReg);
1579         }
1580         break;
1581       case cInt:
1582         if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) {
1583           unsigned Val = cast<ConstantInt>(Args[i].Val)->getRawValue();
1584           addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
1585                        X86::ESP, ArgOffset).addImm(Val);
1586         } else if (Args[i].Val && isa<ConstantPointerNull>(Args[i].Val)) {
1587           addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
1588                        X86::ESP, ArgOffset).addImm(0);
1589         } else {
1590           ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
1591           addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
1592                        X86::ESP, ArgOffset).addReg(ArgReg);
1593         }
1594         break;
1595       case cLong:
1596         if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) {
1597           uint64_t Val = cast<ConstantInt>(Args[i].Val)->getRawValue();
1598           addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
1599                        X86::ESP, ArgOffset).addImm(Val & ~0U);
1600           addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
1601                        X86::ESP, ArgOffset+4).addImm(Val >> 32ULL);
1602         } else {
1603           ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
1604           addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
1605                        X86::ESP, ArgOffset).addReg(ArgReg);
1606           addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
1607                        X86::ESP, ArgOffset+4).addReg(ArgReg+1);
1608         }
1609         ArgOffset += 4;        // 8 byte entry, not 4.
1610         break;
1611
1612       case cFP:
1613         ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
1614         if (Args[i].Ty == Type::FloatTy) {
1615           addRegOffset(BuildMI(BB, X86::FST32m, 5),
1616                        X86::ESP, ArgOffset).addReg(ArgReg);
1617         } else {
1618           assert(Args[i].Ty == Type::DoubleTy && "Unknown FP type!");
1619           addRegOffset(BuildMI(BB, X86::FST64m, 5),
1620                        X86::ESP, ArgOffset).addReg(ArgReg);
1621           ArgOffset += 4;       // 8 byte entry, not 4.
1622         }
1623         break;
1624
1625       default: assert(0 && "Unknown class!");
1626       }
1627       ArgOffset += 4;
1628     }
1629   } else {
1630     BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addImm(0);
1631   }
1632
1633   BB->push_back(CallMI);
1634
1635   BuildMI(BB, X86::ADJCALLSTACKUP, 1).addImm(NumBytes);
1636
1637   // If there is a return value, scavenge the result from the location the call
1638   // leaves it in...
1639   //
1640   if (Ret.Ty != Type::VoidTy) {
1641     unsigned DestClass = getClassB(Ret.Ty);
1642     switch (DestClass) {
1643     case cByte:
1644     case cShort:
1645     case cInt: {
1646       // Integral results are in %eax, or the appropriate portion
1647       // thereof.
1648       static const unsigned regRegMove[] = {
1649         X86::MOV8rr, X86::MOV16rr, X86::MOV32rr
1650       };
1651       static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX };
1652       BuildMI(BB, regRegMove[DestClass], 1, Ret.Reg).addReg(AReg[DestClass]);
1653       break;
1654     }
1655     case cFP:     // Floating-point return values live in %ST(0)
1656       BuildMI(BB, X86::FpGETRESULT, 1, Ret.Reg);
1657       break;
1658     case cLong:   // Long values are left in EDX:EAX
1659       BuildMI(BB, X86::MOV32rr, 1, Ret.Reg).addReg(X86::EAX);
1660       BuildMI(BB, X86::MOV32rr, 1, Ret.Reg+1).addReg(X86::EDX);
1661       break;
1662     default: assert(0 && "Unknown class!");
1663     }
1664   }
1665 }
1666
1667
1668 /// visitCallInst - Push args on stack and do a procedure call instruction.
1669 void X86ISel::visitCallInst(CallInst &CI) {
1670   MachineInstr *TheCall;
1671   if (Function *F = CI.getCalledFunction()) {
1672     // Is it an intrinsic function call?
1673     if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) {
1674       visitIntrinsicCall(ID, CI);   // Special intrinsics are not handled here
1675       return;
1676     }
1677
1678     // Emit a CALL instruction with PC-relative displacement.
1679     TheCall = BuildMI(X86::CALLpcrel32, 1).addGlobalAddress(F, true);
1680   } else {  // Emit an indirect call...
1681     unsigned Reg = getReg(CI.getCalledValue());
1682     TheCall = BuildMI(X86::CALL32r, 1).addReg(Reg);
1683   }
1684
1685   std::vector<ValueRecord> Args;
1686   for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
1687     Args.push_back(ValueRecord(CI.getOperand(i)));
1688
1689   unsigned DestReg = CI.getType() != Type::VoidTy ? getReg(CI) : 0;
1690   doCall(ValueRecord(DestReg, CI.getType()), TheCall, Args);
1691 }
1692
1693 /// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the
1694 /// function, lowering any calls to unknown intrinsic functions into the
1695 /// equivalent LLVM code.
1696 ///
1697 void X86ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
1698   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
1699     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
1700       if (CallInst *CI = dyn_cast<CallInst>(I++))
1701         if (Function *F = CI->getCalledFunction())
1702           switch (F->getIntrinsicID()) {
1703           case Intrinsic::not_intrinsic:
1704           case Intrinsic::vastart:
1705           case Intrinsic::vacopy:
1706           case Intrinsic::vaend:
1707           case Intrinsic::returnaddress:
1708           case Intrinsic::frameaddress:
1709           case Intrinsic::memcpy:
1710           case Intrinsic::memset:
1711           case Intrinsic::isunordered:
1712           case Intrinsic::readport:
1713           case Intrinsic::writeport:
1714             // We directly implement these intrinsics
1715             break;
1716           case Intrinsic::readio: {
1717             // On X86, memory operations are in-order.  Lower this intrinsic
1718             // into a volatile load.
1719             Instruction *Before = CI->getPrev();
1720             LoadInst * LI = new LoadInst(CI->getOperand(1), "", true, CI);
1721             CI->replaceAllUsesWith(LI);
1722             BB->getInstList().erase(CI);
1723             break;
1724           }
1725           case Intrinsic::writeio: {
1726             // On X86, memory operations are in-order.  Lower this intrinsic
1727             // into a volatile store.
1728             Instruction *Before = CI->getPrev();
1729             StoreInst *LI = new StoreInst(CI->getOperand(1),
1730                                           CI->getOperand(2), true, CI);
1731             CI->replaceAllUsesWith(LI);
1732             BB->getInstList().erase(CI);
1733             break;
1734           }
1735           default:
1736             // All other intrinsic calls we must lower.
1737             Instruction *Before = CI->getPrev();
1738             TM.getIntrinsicLowering().LowerIntrinsicCall(CI);
1739             if (Before) {        // Move iterator to instruction after call
1740               I = Before; ++I;
1741             } else {
1742               I = BB->begin();
1743             }
1744           }
1745 }
1746
1747 void X86ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
1748   unsigned TmpReg1, TmpReg2;
1749   switch (ID) {
1750   case Intrinsic::vastart:
1751     // Get the address of the first vararg value...
1752     TmpReg1 = getReg(CI);
1753     addFrameReference(BuildMI(BB, X86::LEA32r, 5, TmpReg1), VarArgsFrameIndex);
1754     return;
1755
1756   case Intrinsic::vacopy:
1757     TmpReg1 = getReg(CI);
1758     TmpReg2 = getReg(CI.getOperand(1));
1759     BuildMI(BB, X86::MOV32rr, 1, TmpReg1).addReg(TmpReg2);
1760     return;
1761   case Intrinsic::vaend: return;   // Noop on X86
1762
1763   case Intrinsic::returnaddress:
1764   case Intrinsic::frameaddress:
1765     TmpReg1 = getReg(CI);
1766     if (cast<Constant>(CI.getOperand(1))->isNullValue()) {
1767       if (ReturnAddressIndex == 0) {
1768         // Set up a frame object for the return address.
1769         ReturnAddressIndex = F->getFrameInfo()->CreateFixedObject(4, -4);
1770       }
1771
1772       if (ID == Intrinsic::returnaddress) {
1773         // Just load the return address
1774         addFrameReference(BuildMI(BB, X86::MOV32rm, 4, TmpReg1),
1775                           ReturnAddressIndex);
1776       } else {
1777         addFrameReference(BuildMI(BB, X86::LEA32r, 4, TmpReg1),
1778                           ReturnAddressIndex, -4);
1779       }
1780     } else {
1781       // Values other than zero are not implemented yet.
1782       BuildMI(BB, X86::MOV32ri, 1, TmpReg1).addImm(0);
1783     }
1784     return;
1785
1786   case Intrinsic::isunordered:
1787     TmpReg1 = getReg(CI.getOperand(1));
1788     TmpReg2 = getReg(CI.getOperand(2));
1789     emitUCOMr(BB, BB->end(), TmpReg2, TmpReg1);
1790     TmpReg2 = getReg(CI);
1791     BuildMI(BB, X86::SETPr, 0, TmpReg2);
1792     return;
1793
1794   case Intrinsic::memcpy: {
1795     assert(CI.getNumOperands() == 5 && "Illegal llvm.memcpy call!");
1796     unsigned Align = 1;
1797     if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
1798       Align = AlignC->getRawValue();
1799       if (Align == 0) Align = 1;
1800     }
1801
1802     // Turn the byte code into # iterations
1803     unsigned CountReg;
1804     unsigned Opcode;
1805     switch (Align & 3) {
1806     case 2:   // WORD aligned
1807       if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
1808         CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/2));
1809       } else {
1810         CountReg = makeAnotherReg(Type::IntTy);
1811         unsigned ByteReg = getReg(CI.getOperand(3));
1812         BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(1);
1813       }
1814       Opcode = X86::REP_MOVSW;
1815       break;
1816     case 0:   // DWORD aligned
1817       if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
1818         CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/4));
1819       } else {
1820         CountReg = makeAnotherReg(Type::IntTy);
1821         unsigned ByteReg = getReg(CI.getOperand(3));
1822         BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(2);
1823       }
1824       Opcode = X86::REP_MOVSD;
1825       break;
1826     default:  // BYTE aligned
1827       CountReg = getReg(CI.getOperand(3));
1828       Opcode = X86::REP_MOVSB;
1829       break;
1830     }
1831
1832     // No matter what the alignment is, we put the source in ESI, the
1833     // destination in EDI, and the count in ECX.
1834     TmpReg1 = getReg(CI.getOperand(1));
1835     TmpReg2 = getReg(CI.getOperand(2));
1836     BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg);
1837     BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1);
1838     BuildMI(BB, X86::MOV32rr, 1, X86::ESI).addReg(TmpReg2);
1839     BuildMI(BB, Opcode, 0);
1840     return;
1841   }
1842   case Intrinsic::memset: {
1843     assert(CI.getNumOperands() == 5 && "Illegal llvm.memset call!");
1844     unsigned Align = 1;
1845     if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
1846       Align = AlignC->getRawValue();
1847       if (Align == 0) Align = 1;
1848     }
1849
1850     // Turn the byte code into # iterations
1851     unsigned CountReg;
1852     unsigned Opcode;
1853     if (ConstantInt *ValC = dyn_cast<ConstantInt>(CI.getOperand(2))) {
1854       unsigned Val = ValC->getRawValue() & 255;
1855
1856       // If the value is a constant, then we can potentially use larger copies.
1857       switch (Align & 3) {
1858       case 2:   // WORD aligned
1859         if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
1860           CountReg =getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/2));
1861         } else {
1862           CountReg = makeAnotherReg(Type::IntTy);
1863           unsigned ByteReg = getReg(CI.getOperand(3));
1864           BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(1);
1865         }
1866         BuildMI(BB, X86::MOV16ri, 1, X86::AX).addImm((Val << 8) | Val);
1867         Opcode = X86::REP_STOSW;
1868         break;
1869       case 0:   // DWORD aligned
1870         if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
1871           CountReg =getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/4));
1872         } else {
1873           CountReg = makeAnotherReg(Type::IntTy);
1874           unsigned ByteReg = getReg(CI.getOperand(3));
1875           BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(2);
1876         }
1877         Val = (Val << 8) | Val;
1878         BuildMI(BB, X86::MOV32ri, 1, X86::EAX).addImm((Val << 16) | Val);
1879         Opcode = X86::REP_STOSD;
1880         break;
1881       default:  // BYTE aligned
1882         CountReg = getReg(CI.getOperand(3));
1883         BuildMI(BB, X86::MOV8ri, 1, X86::AL).addImm(Val);
1884         Opcode = X86::REP_STOSB;
1885         break;
1886       }
1887     } else {
1888       // If it's not a constant value we are storing, just fall back.  We could
1889       // try to be clever to form 16 bit and 32 bit values, but we don't yet.
1890       unsigned ValReg = getReg(CI.getOperand(2));
1891       BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg);
1892       CountReg = getReg(CI.getOperand(3));
1893       Opcode = X86::REP_STOSB;
1894     }
1895
1896     // No matter what the alignment is, we put the source in ESI, the
1897     // destination in EDI, and the count in ECX.
1898     TmpReg1 = getReg(CI.getOperand(1));
1899     //TmpReg2 = getReg(CI.getOperand(2));
1900     BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg);
1901     BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1);
1902     BuildMI(BB, Opcode, 0);
1903     return;
1904   }
1905
1906   case Intrinsic::readport: {
1907     // First, determine that the size of the operand falls within the acceptable
1908     // range for this architecture.
1909     //
1910     if (getClassB(CI.getOperand(1)->getType()) != cShort) {
1911       std::cerr << "llvm.readport: Address size is not 16 bits\n";
1912       exit(1);
1913     }
1914
1915     // Now, move the I/O port address into the DX register and use the IN
1916     // instruction to get the input data.
1917     //
1918     unsigned Class = getClass(CI.getCalledFunction()->getReturnType());
1919     unsigned DestReg = getReg(CI);
1920
1921     // If the port is a single-byte constant, use the immediate form.
1922     if (ConstantInt *C = dyn_cast<ConstantInt>(CI.getOperand(1)))
1923       if ((C->getRawValue() & 255) == C->getRawValue()) {
1924         switch (Class) {
1925         case cByte:
1926           BuildMI(BB, X86::IN8ri, 1).addImm((unsigned char)C->getRawValue());
1927           BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
1928           return;
1929         case cShort:
1930           BuildMI(BB, X86::IN16ri, 1).addImm((unsigned char)C->getRawValue());
1931           BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX);
1932           return;
1933         case cInt:
1934           BuildMI(BB, X86::IN32ri, 1).addImm((unsigned char)C->getRawValue());
1935           BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX);
1936           return;
1937         }
1938       }
1939
1940     unsigned Reg = getReg(CI.getOperand(1));
1941     BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg);
1942     switch (Class) {
1943     case cByte:
1944       BuildMI(BB, X86::IN8rr, 0);
1945       BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
1946       break;
1947     case cShort:
1948       BuildMI(BB, X86::IN16rr, 0);
1949       BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX);
1950       break;
1951     case cInt:
1952       BuildMI(BB, X86::IN32rr, 0);
1953       BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX);
1954       break;
1955     default:
1956       std::cerr << "Cannot do input on this data type";
1957       exit (1);
1958     }
1959     return;
1960   }
1961
1962   case Intrinsic::writeport: {
1963     // First, determine that the size of the operand falls within the
1964     // acceptable range for this architecture.
1965     if (getClass(CI.getOperand(2)->getType()) != cShort) {
1966       std::cerr << "llvm.writeport: Address size is not 16 bits\n";
1967       exit(1);
1968     }
1969
1970     unsigned Class = getClassB(CI.getOperand(1)->getType());
1971     unsigned ValReg = getReg(CI.getOperand(1));
1972     switch (Class) {
1973     case cByte:
1974       BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg);
1975       break;
1976     case cShort:
1977       BuildMI(BB, X86::MOV16rr, 1, X86::AX).addReg(ValReg);
1978       break;
1979     case cInt:
1980       BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(ValReg);
1981       break;
1982     default:
1983       std::cerr << "llvm.writeport: invalid data type for X86 target";
1984       exit(1);
1985     }
1986
1987
1988     // If the port is a single-byte constant, use the immediate form.
1989     if (ConstantInt *C = dyn_cast<ConstantInt>(CI.getOperand(2)))
1990       if ((C->getRawValue() & 255) == C->getRawValue()) {
1991         static const unsigned O[] = { X86::OUT8ir, X86::OUT16ir, X86::OUT32ir };
1992         BuildMI(BB, O[Class], 1).addImm((unsigned char)C->getRawValue());
1993         return;
1994       }
1995
1996     // Otherwise, move the I/O port address into the DX register and the value
1997     // to write into the AL/AX/EAX register.
1998     static const unsigned Opc[] = { X86::OUT8rr, X86::OUT16rr, X86::OUT32rr };
1999     unsigned Reg = getReg(CI.getOperand(2));
2000     BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg);
2001     BuildMI(BB, Opc[Class], 0);
2002     return;
2003   }
2004
2005   default: assert(0 && "Error: unknown intrinsics should have been lowered!");
2006   }
2007 }
2008
2009 static bool isSafeToFoldLoadIntoInstruction(LoadInst &LI, Instruction &User) {
2010   if (LI.getParent() != User.getParent())
2011     return false;
2012   BasicBlock::iterator It = &LI;
2013   // Check all of the instructions between the load and the user.  We should
2014   // really use alias analysis here, but for now we just do something simple.
2015   for (++It; It != BasicBlock::iterator(&User); ++It) {
2016     switch (It->getOpcode()) {
2017     case Instruction::Free:
2018     case Instruction::Store:
2019     case Instruction::Call:
2020     case Instruction::Invoke:
2021       return false;
2022     case Instruction::Load:
2023       if (cast<LoadInst>(It)->isVolatile() && LI.isVolatile())
2024         return false;
2025       break;
2026     }
2027   }
2028   return true;
2029 }
2030
2031 /// visitSimpleBinary - Implement simple binary operators for integral types...
2032 /// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for
2033 /// Xor.
2034 ///
2035 void X86ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
2036   unsigned DestReg = getReg(B);
2037   MachineBasicBlock::iterator MI = BB->end();
2038   Value *Op0 = B.getOperand(0), *Op1 = B.getOperand(1);
2039   unsigned Class = getClassB(B.getType());
2040
2041   // If this is AND X, C, and it is only used by a setcc instruction, it will
2042   // be folded.  There is no need to emit this instruction.
2043   if (B.hasOneUse() && OperatorClass == 2 && isa<ConstantInt>(Op1))
2044     if (Class == cByte || Class == cShort || Class == cInt) {
2045       Instruction *Use = cast<Instruction>(B.use_back());
2046       if (isa<SetCondInst>(Use) &&
2047           Use->getOperand(1) == Constant::getNullValue(B.getType())) {
2048         switch (getSetCCNumber(Use->getOpcode())) {
2049         case 0:
2050         case 1:
2051           return;
2052         default:
2053           if (B.getType()->isSigned()) return;
2054         }
2055       }
2056     }
2057
2058   // Special case: op Reg, load [mem]
2059   if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1) && Class != cLong &&
2060       Op0->hasOneUse() &&
2061       isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op0), B))
2062     if (!B.swapOperands())
2063       std::swap(Op0, Op1);  // Make sure any loads are in the RHS.
2064
2065   if (isa<LoadInst>(Op1) && Class != cLong && Op1->hasOneUse() &&
2066       isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op1), B)) {
2067
2068     unsigned Opcode;
2069     if (Class != cFP) {
2070       static const unsigned OpcodeTab[][3] = {
2071         // Arithmetic operators
2072         { X86::ADD8rm, X86::ADD16rm, X86::ADD32rm },  // ADD
2073         { X86::SUB8rm, X86::SUB16rm, X86::SUB32rm },  // SUB
2074
2075         // Bitwise operators
2076         { X86::AND8rm, X86::AND16rm, X86::AND32rm },  // AND
2077         { X86:: OR8rm, X86:: OR16rm, X86:: OR32rm },  // OR
2078         { X86::XOR8rm, X86::XOR16rm, X86::XOR32rm },  // XOR
2079       };
2080       Opcode = OpcodeTab[OperatorClass][Class];
2081     } else {
2082       static const unsigned OpcodeTab[][2] = {
2083         { X86::FADD32m, X86::FADD64m },  // ADD
2084         { X86::FSUB32m, X86::FSUB64m },  // SUB
2085       };
2086       const Type *Ty = Op0->getType();
2087       assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
2088       Opcode = OpcodeTab[OperatorClass][Ty == Type::DoubleTy];
2089     }
2090
2091     unsigned Op0r = getReg(Op0);
2092     if (AllocaInst *AI =
2093         dyn_castFixedAlloca(cast<LoadInst>(Op1)->getOperand(0))) {
2094       unsigned FI = getFixedSizedAllocaFI(AI);
2095       addFrameReference(BuildMI(BB, Opcode, 5, DestReg).addReg(Op0r), FI);
2096
2097     } else {
2098       X86AddressMode AM;
2099       getAddressingMode(cast<LoadInst>(Op1)->getOperand(0), AM);
2100
2101       addFullAddress(BuildMI(BB, Opcode, 5, DestReg).addReg(Op0r), AM);
2102     }
2103     return;
2104   }
2105
2106   // If this is a floating point subtract, check to see if we can fold the first
2107   // operand in.
2108   if (Class == cFP && OperatorClass == 1 &&
2109       isa<LoadInst>(Op0) &&
2110       isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op0), B)) {
2111     const Type *Ty = Op0->getType();
2112     assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
2113     unsigned Opcode = Ty == Type::FloatTy ? X86::FSUBR32m : X86::FSUBR64m;
2114
2115     unsigned Op1r = getReg(Op1);
2116     if (AllocaInst *AI =
2117         dyn_castFixedAlloca(cast<LoadInst>(Op0)->getOperand(0))) {
2118       unsigned FI = getFixedSizedAllocaFI(AI);
2119       addFrameReference(BuildMI(BB, Opcode, 5, DestReg).addReg(Op1r), FI);
2120     } else {
2121       X86AddressMode AM;
2122       getAddressingMode(cast<LoadInst>(Op0)->getOperand(0), AM);
2123
2124       addFullAddress(BuildMI(BB, Opcode, 5, DestReg).addReg(Op1r), AM);
2125     }
2126     return;
2127   }
2128
2129   emitSimpleBinaryOperation(BB, MI, Op0, Op1, OperatorClass, DestReg);
2130 }
2131
2132
2133 /// emitBinaryFPOperation - This method handles emission of floating point
2134 /// Add (0), Sub (1), Mul (2), and Div (3) operations.
2135 void X86ISel::emitBinaryFPOperation(MachineBasicBlock *BB,
2136                                     MachineBasicBlock::iterator IP,
2137                                     Value *Op0, Value *Op1,
2138                                     unsigned OperatorClass, unsigned DestReg) {
2139   // Special case: op Reg, <const fp>
2140   if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1))
2141     if (!Op1C->isExactlyValue(+0.0) && !Op1C->isExactlyValue(+1.0)) {
2142       // Create a constant pool entry for this constant.
2143       MachineConstantPool *CP = F->getConstantPool();
2144       unsigned CPI = CP->getConstantPoolIndex(Op1C);
2145       const Type *Ty = Op1->getType();
2146
2147       static const unsigned OpcodeTab[][4] = {
2148         { X86::FADD32m, X86::FSUB32m, X86::FMUL32m, X86::FDIV32m },   // Float
2149         { X86::FADD64m, X86::FSUB64m, X86::FMUL64m, X86::FDIV64m },   // Double
2150       };
2151
2152       assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
2153       unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass];
2154       unsigned Op0r = getReg(Op0, BB, IP);
2155       addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5,
2156                                        DestReg).addReg(Op0r), CPI);
2157       return;
2158     }
2159
2160   // Special case: R1 = op <const fp>, R2
2161   if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op0))
2162     if (CFP->isExactlyValue(-0.0) && OperatorClass == 1) {
2163       // -0.0 - X === -X
2164       unsigned op1Reg = getReg(Op1, BB, IP);
2165       BuildMI(*BB, IP, X86::FCHS, 1, DestReg).addReg(op1Reg);
2166       return;
2167     } else if (!CFP->isExactlyValue(+0.0) && !CFP->isExactlyValue(+1.0)) {
2168       // R1 = op CST, R2  -->  R1 = opr R2, CST
2169
2170       // Create a constant pool entry for this constant.
2171       MachineConstantPool *CP = F->getConstantPool();
2172       unsigned CPI = CP->getConstantPoolIndex(CFP);
2173       const Type *Ty = CFP->getType();
2174
2175       static const unsigned OpcodeTab[][4] = {
2176         { X86::FADD32m, X86::FSUBR32m, X86::FMUL32m, X86::FDIVR32m }, // Float
2177         { X86::FADD64m, X86::FSUBR64m, X86::FMUL64m, X86::FDIVR64m }, // Double
2178       };
2179
2180       assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
2181       unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass];
2182       unsigned Op1r = getReg(Op1, BB, IP);
2183       addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5,
2184                                        DestReg).addReg(Op1r), CPI);
2185       return;
2186     }
2187
2188   // General case.
2189   static const unsigned OpcodeTab[4] = {
2190     X86::FpADD, X86::FpSUB, X86::FpMUL, X86::FpDIV
2191   };
2192
2193   unsigned Opcode = OpcodeTab[OperatorClass];
2194   unsigned Op0r = getReg(Op0, BB, IP);
2195   unsigned Op1r = getReg(Op1, BB, IP);
2196   BuildMI(*BB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
2197 }
2198
2199 /// emitSimpleBinaryOperation - Implement simple binary operators for integral
2200 /// types...  OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for
2201 /// Or, 4 for Xor.
2202 ///
2203 /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary
2204 /// and constant expression support.
2205 ///
2206 void X86ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB,
2207                                         MachineBasicBlock::iterator IP,
2208                                         Value *Op0, Value *Op1,
2209                                         unsigned OperatorClass,
2210                                         unsigned DestReg) {
2211   unsigned Class = getClassB(Op0->getType());
2212
2213   if (Class == cFP) {
2214     assert(OperatorClass < 2 && "No logical ops for FP!");
2215     emitBinaryFPOperation(MBB, IP, Op0, Op1, OperatorClass, DestReg);
2216     return;
2217   }
2218
2219   if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0))
2220     if (OperatorClass == 1) {
2221       static unsigned const NEGTab[] = {
2222         X86::NEG8r, X86::NEG16r, X86::NEG32r, 0, X86::NEG32r
2223       };
2224
2225       // sub 0, X -> neg X
2226       if (CI->isNullValue()) {
2227         unsigned op1Reg = getReg(Op1, MBB, IP);
2228         BuildMI(*MBB, IP, NEGTab[Class], 1, DestReg).addReg(op1Reg);
2229
2230         if (Class == cLong) {
2231           // We just emitted: Dl = neg Sl
2232           // Now emit       : T  = addc Sh, 0
2233           //                : Dh = neg T
2234           unsigned T = makeAnotherReg(Type::IntTy);
2235           BuildMI(*MBB, IP, X86::ADC32ri, 2, T).addReg(op1Reg+1).addImm(0);
2236           BuildMI(*MBB, IP, X86::NEG32r, 1, DestReg+1).addReg(T);
2237         }
2238         return;
2239       } else if (Op1->hasOneUse() && Class != cLong) {
2240         // sub C, X -> tmp = neg X; DestReg = add tmp, C.  This is better
2241         // than copying C into a temporary register, because of register
2242         // pressure (tmp and destreg can share a register.
2243         static unsigned const ADDRITab[] = {
2244           X86::ADD8ri, X86::ADD16ri, X86::ADD32ri, 0, X86::ADD32ri
2245         };
2246         unsigned op1Reg = getReg(Op1, MBB, IP);
2247         unsigned Tmp = makeAnotherReg(Op0->getType());
2248         BuildMI(*MBB, IP, NEGTab[Class], 1, Tmp).addReg(op1Reg);
2249         BuildMI(*MBB, IP, ADDRITab[Class], 2,
2250                 DestReg).addReg(Tmp).addImm(CI->getRawValue());
2251         return;
2252       }
2253     }
2254
2255   // Special case: op Reg, <const int>
2256   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
2257     unsigned Op0r = getReg(Op0, MBB, IP);
2258
2259     // xor X, -1 -> not X
2260     if (OperatorClass == 4 && Op1C->isAllOnesValue()) {
2261       static unsigned const NOTTab[] = {
2262         X86::NOT8r, X86::NOT16r, X86::NOT32r, 0, X86::NOT32r
2263       };
2264       BuildMI(*MBB, IP, NOTTab[Class], 1, DestReg).addReg(Op0r);
2265       if (Class == cLong)  // Invert the top part too
2266         BuildMI(*MBB, IP, X86::NOT32r, 1, DestReg+1).addReg(Op0r+1);
2267       return;
2268     }
2269
2270     // add X, -1 -> dec X
2271     if (OperatorClass == 0 && Op1C->isAllOnesValue() && Class != cLong) {
2272       // Note that we can't use dec for 64-bit decrements, because it does not
2273       // set the carry flag!
2274       static unsigned const DECTab[] = { X86::DEC8r, X86::DEC16r, X86::DEC32r };
2275       BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r);
2276       return;
2277     }
2278
2279     // add X, 1 -> inc X
2280     if (OperatorClass == 0 && Op1C->equalsInt(1) && Class != cLong) {
2281       // Note that we can't use inc for 64-bit increments, because it does not
2282       // set the carry flag!
2283       static unsigned const INCTab[] = { X86::INC8r, X86::INC16r, X86::INC32r };
2284       BuildMI(*MBB, IP, INCTab[Class], 1, DestReg).addReg(Op0r);
2285       return;
2286     }
2287
2288     static const unsigned OpcodeTab[][5] = {
2289       // Arithmetic operators
2290       { X86::ADD8ri, X86::ADD16ri, X86::ADD32ri, 0, X86::ADD32ri },  // ADD
2291       { X86::SUB8ri, X86::SUB16ri, X86::SUB32ri, 0, X86::SUB32ri },  // SUB
2292
2293       // Bitwise operators
2294       { X86::AND8ri, X86::AND16ri, X86::AND32ri, 0, X86::AND32ri },  // AND
2295       { X86:: OR8ri, X86:: OR16ri, X86:: OR32ri, 0, X86::OR32ri  },  // OR
2296       { X86::XOR8ri, X86::XOR16ri, X86::XOR32ri, 0, X86::XOR32ri },  // XOR
2297     };
2298
2299     unsigned Opcode = OpcodeTab[OperatorClass][Class];
2300     unsigned Op1l = cast<ConstantInt>(Op1C)->getRawValue();
2301
2302     if (Class != cLong) {
2303       BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
2304       return;
2305     }
2306
2307     // If this is a long value and the high or low bits have a special
2308     // property, emit some special cases.
2309     unsigned Op1h = cast<ConstantInt>(Op1C)->getRawValue() >> 32LL;
2310
2311     // If the constant is zero in the low 32-bits, just copy the low part
2312     // across and apply the normal 32-bit operation to the high parts.  There
2313     // will be no carry or borrow into the top.
2314     if (Op1l == 0) {
2315       if (OperatorClass != 2) // All but and...
2316         BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0r);
2317       else
2318         BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
2319       BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg+1)
2320         .addReg(Op0r+1).addImm(Op1h);
2321       return;
2322     }
2323
2324     // If this is a logical operation and the top 32-bits are zero, just
2325     // operate on the lower 32.
2326     if (Op1h == 0 && OperatorClass > 1) {
2327       BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg)
2328         .addReg(Op0r).addImm(Op1l);
2329       if (OperatorClass != 2)  // All but and
2330         BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(Op0r+1);
2331       else
2332         BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
2333       return;
2334     }
2335
2336     // TODO: We could handle lots of other special cases here, such as AND'ing
2337     // with 0xFFFFFFFF00000000 -> noop, etc.
2338
2339     // Otherwise, code generate the full operation with a constant.
2340     static const unsigned TopTab[] = {
2341       X86::ADC32ri, X86::SBB32ri, X86::AND32ri, X86::OR32ri, X86::XOR32ri
2342     };
2343
2344     BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
2345     BuildMI(*MBB, IP, TopTab[OperatorClass], 2, DestReg+1)
2346       .addReg(Op0r+1).addImm(Op1h);
2347     return;
2348   }
2349
2350   // Finally, handle the general case now.
2351   static const unsigned OpcodeTab[][5] = {
2352     // Arithmetic operators
2353     { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, 0, X86::ADD32rr },  // ADD
2354     { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, 0, X86::SUB32rr },  // SUB
2355
2356     // Bitwise operators
2357     { X86::AND8rr, X86::AND16rr, X86::AND32rr, 0, X86::AND32rr },  // AND
2358     { X86:: OR8rr, X86:: OR16rr, X86:: OR32rr, 0, X86:: OR32rr },  // OR
2359     { X86::XOR8rr, X86::XOR16rr, X86::XOR32rr, 0, X86::XOR32rr },  // XOR
2360   };
2361
2362   unsigned Opcode = OpcodeTab[OperatorClass][Class];
2363   unsigned Op0r = getReg(Op0, MBB, IP);
2364   unsigned Op1r = getReg(Op1, MBB, IP);
2365   BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
2366
2367   if (Class == cLong) {        // Handle the upper 32 bits of long values...
2368     static const unsigned TopTab[] = {
2369       X86::ADC32rr, X86::SBB32rr, X86::AND32rr, X86::OR32rr, X86::XOR32rr
2370     };
2371     BuildMI(*MBB, IP, TopTab[OperatorClass], 2,
2372             DestReg+1).addReg(Op0r+1).addReg(Op1r+1);
2373   }
2374 }
2375
2376 /// doMultiply - Emit appropriate instructions to multiply together the
2377 /// registers op0Reg and op1Reg, and put the result in DestReg.  The type of the
2378 /// result should be given as DestTy.
2379 ///
2380 void X86ISel::doMultiply(MachineBasicBlock *MBB,
2381                          MachineBasicBlock::iterator MBBI,
2382                          unsigned DestReg, const Type *DestTy,
2383                          unsigned op0Reg, unsigned op1Reg) {
2384   unsigned Class = getClass(DestTy);
2385   switch (Class) {
2386   case cInt:
2387   case cShort:
2388     BuildMI(*MBB, MBBI, Class == cInt ? X86::IMUL32rr:X86::IMUL16rr, 2, DestReg)
2389       .addReg(op0Reg).addReg(op1Reg);
2390     return;
2391   case cByte:
2392     // Must use the MUL instruction, which forces use of AL...
2393     BuildMI(*MBB, MBBI, X86::MOV8rr, 1, X86::AL).addReg(op0Reg);
2394     BuildMI(*MBB, MBBI, X86::MUL8r, 1).addReg(op1Reg);
2395     BuildMI(*MBB, MBBI, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
2396     return;
2397   default:
2398   case cLong: assert(0 && "doMultiply cannot operate on LONG values!");
2399   }
2400 }
2401
2402 // ExactLog2 - This function solves for (Val == 1 << (N-1)) and returns N.  It
2403 // returns zero when the input is not exactly a power of two.
2404 static unsigned ExactLog2(unsigned Val) {
2405   if (Val == 0 || (Val & (Val-1))) return 0;
2406   unsigned Count = 0;
2407   while (Val != 1) {
2408     Val >>= 1;
2409     ++Count;
2410   }
2411   return Count+1;
2412 }
2413
2414
2415 /// doMultiplyConst - This function is specialized to efficiently codegen an 8,
2416 /// 16, or 32-bit integer multiply by a constant.
2417 void X86ISel::doMultiplyConst(MachineBasicBlock *MBB,
2418                               MachineBasicBlock::iterator IP,
2419                               unsigned DestReg, const Type *DestTy,
2420                               unsigned op0Reg, unsigned ConstRHS) {
2421   static const unsigned MOVrrTab[] = {X86::MOV8rr, X86::MOV16rr, X86::MOV32rr};
2422   static const unsigned MOVriTab[] = {X86::MOV8ri, X86::MOV16ri, X86::MOV32ri};
2423   static const unsigned ADDrrTab[] = {X86::ADD8rr, X86::ADD16rr, X86::ADD32rr};
2424   static const unsigned NEGrTab[]  = {X86::NEG8r , X86::NEG16r , X86::NEG32r };
2425
2426   unsigned Class = getClass(DestTy);
2427   unsigned TmpReg;
2428
2429   // Handle special cases here.
2430   switch (ConstRHS) {
2431   case -2:
2432     TmpReg = makeAnotherReg(DestTy);
2433     BuildMI(*MBB, IP, NEGrTab[Class], 1, TmpReg).addReg(op0Reg);
2434     BuildMI(*MBB, IP, ADDrrTab[Class], 1,DestReg).addReg(TmpReg).addReg(TmpReg);
2435     return;
2436   case -1:
2437     BuildMI(*MBB, IP, NEGrTab[Class], 1, DestReg).addReg(op0Reg);
2438     return;
2439   case 0:
2440     BuildMI(*MBB, IP, MOVriTab[Class], 1, DestReg).addImm(0);
2441     return;
2442   case 1:
2443     BuildMI(*MBB, IP, MOVrrTab[Class], 1, DestReg).addReg(op0Reg);
2444     return;
2445   case 2:
2446     BuildMI(*MBB, IP, ADDrrTab[Class], 1,DestReg).addReg(op0Reg).addReg(op0Reg);
2447     return;
2448   case 3:
2449   case 5:
2450   case 9:
2451     if (Class == cInt) {
2452       X86AddressMode AM;
2453       AM.BaseType = X86AddressMode::RegBase;
2454       AM.Base.Reg = op0Reg;
2455       AM.Scale = ConstRHS-1;
2456       AM.IndexReg = op0Reg;
2457       AM.Disp = 0;
2458       addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, DestReg), AM);
2459       return;
2460     }
2461   case -3:
2462   case -5:
2463   case -9:
2464     if (Class == cInt) {
2465       TmpReg = makeAnotherReg(DestTy);
2466       X86AddressMode AM;
2467       AM.BaseType = X86AddressMode::RegBase;
2468       AM.Base.Reg = op0Reg;
2469       AM.Scale = -ConstRHS-1;
2470       AM.IndexReg = op0Reg;
2471       AM.Disp = 0;
2472       addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, TmpReg), AM);
2473       BuildMI(*MBB, IP, NEGrTab[Class], 1, DestReg).addReg(TmpReg);
2474       return;
2475     }
2476   }
2477
2478   // If the element size is exactly a power of 2, use a shift to get it.
2479   if (unsigned Shift = ExactLog2(ConstRHS)) {
2480     switch (Class) {
2481     default: assert(0 && "Unknown class for this function!");
2482     case cByte:
2483       BuildMI(*MBB, IP, X86::SHL8ri,2, DestReg).addReg(op0Reg).addImm(Shift-1);
2484       return;
2485     case cShort:
2486       BuildMI(*MBB, IP, X86::SHL16ri,2, DestReg).addReg(op0Reg).addImm(Shift-1);
2487       return;
2488     case cInt:
2489       BuildMI(*MBB, IP, X86::SHL32ri,2, DestReg).addReg(op0Reg).addImm(Shift-1);
2490       return;
2491     }
2492   }
2493
2494   // If the element size is a negative power of 2, use a shift/neg to get it.
2495   if (unsigned Shift = ExactLog2(-ConstRHS)) {
2496     TmpReg = makeAnotherReg(DestTy);
2497     BuildMI(*MBB, IP, NEGrTab[Class], 1, TmpReg).addReg(op0Reg);
2498     switch (Class) {
2499     default: assert(0 && "Unknown class for this function!");
2500     case cByte:
2501       BuildMI(*MBB, IP, X86::SHL8ri,2, DestReg).addReg(TmpReg).addImm(Shift-1);
2502       return;
2503     case cShort:
2504       BuildMI(*MBB, IP, X86::SHL16ri,2, DestReg).addReg(TmpReg).addImm(Shift-1);
2505       return;
2506     case cInt:
2507       BuildMI(*MBB, IP, X86::SHL32ri,2, DestReg).addReg(TmpReg).addImm(Shift-1);
2508       return;
2509     }
2510   }
2511
2512   if (Class == cShort) {
2513     BuildMI(*MBB, IP, X86::IMUL16rri,2,DestReg).addReg(op0Reg).addImm(ConstRHS);
2514     return;
2515   } else if (Class == cInt) {
2516     BuildMI(*MBB, IP, X86::IMUL32rri,2,DestReg).addReg(op0Reg).addImm(ConstRHS);
2517     return;
2518   }
2519
2520   // Most general case, emit a normal multiply...
2521   TmpReg = makeAnotherReg(DestTy);
2522   BuildMI(*MBB, IP, MOVriTab[Class], 1, TmpReg).addImm(ConstRHS);
2523
2524   // Emit a MUL to multiply the register holding the index by
2525   // elementSize, putting the result in OffsetReg.
2526   doMultiply(MBB, IP, DestReg, DestTy, op0Reg, TmpReg);
2527 }
2528
2529 /// visitMul - Multiplies are not simple binary operators because they must deal
2530 /// with the EAX register explicitly.
2531 ///
2532 void X86ISel::visitMul(BinaryOperator &I) {
2533   unsigned ResultReg = getReg(I);
2534
2535   Value *Op0 = I.getOperand(0);
2536   Value *Op1 = I.getOperand(1);
2537
2538   // Fold loads into floating point multiplies.
2539   if (getClass(Op0->getType()) == cFP) {
2540     if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1))
2541       if (!I.swapOperands())
2542         std::swap(Op0, Op1);  // Make sure any loads are in the RHS.
2543     if (LoadInst *LI = dyn_cast<LoadInst>(Op1))
2544       if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
2545         const Type *Ty = Op0->getType();
2546         assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
2547         unsigned Opcode = Ty == Type::FloatTy ? X86::FMUL32m : X86::FMUL64m;
2548
2549         unsigned Op0r = getReg(Op0);
2550         if (AllocaInst *AI = dyn_castFixedAlloca(LI->getOperand(0))) {
2551           unsigned FI = getFixedSizedAllocaFI(AI);
2552           addFrameReference(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), FI);
2553         } else {
2554           X86AddressMode AM;
2555           getAddressingMode(LI->getOperand(0), AM);
2556
2557           addFullAddress(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), AM);
2558         }
2559         return;
2560       }
2561   }
2562
2563   MachineBasicBlock::iterator IP = BB->end();
2564   emitMultiply(BB, IP, Op0, Op1, ResultReg);
2565 }
2566
2567 void X86ISel::emitMultiply(MachineBasicBlock *MBB,
2568                            MachineBasicBlock::iterator IP,
2569                            Value *Op0, Value *Op1, unsigned DestReg) {
2570   MachineBasicBlock &BB = *MBB;
2571   TypeClass Class = getClass(Op0->getType());
2572
2573   // Simple scalar multiply?
2574   unsigned Op0Reg  = getReg(Op0, &BB, IP);
2575   switch (Class) {
2576   case cByte:
2577   case cShort:
2578   case cInt:
2579     if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
2580       unsigned Val = (unsigned)CI->getRawValue(); // Isn't a 64-bit constant
2581       doMultiplyConst(&BB, IP, DestReg, Op0->getType(), Op0Reg, Val);
2582     } else {
2583       unsigned Op1Reg  = getReg(Op1, &BB, IP);
2584       doMultiply(&BB, IP, DestReg, Op1->getType(), Op0Reg, Op1Reg);
2585     }
2586     return;
2587   case cFP:
2588     emitBinaryFPOperation(MBB, IP, Op0, Op1, 2, DestReg);
2589     return;
2590   case cLong:
2591     break;
2592   }
2593
2594   // Long value.  We have to do things the hard way...
2595   if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
2596     unsigned CLow = CI->getRawValue();
2597     unsigned CHi  = CI->getRawValue() >> 32;
2598
2599     if (CLow == 0) {
2600       // If the low part of the constant is all zeros, things are simple.
2601       BuildMI(BB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
2602       doMultiplyConst(&BB, IP, DestReg+1, Type::UIntTy, Op0Reg, CHi);
2603       return;
2604     }
2605
2606     // Multiply the two low parts... capturing carry into EDX
2607     unsigned OverflowReg = 0;
2608     if (CLow == 1) {
2609       BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0Reg);
2610     } else {
2611       unsigned Op1RegL = makeAnotherReg(Type::UIntTy);
2612       OverflowReg = makeAnotherReg(Type::UIntTy);
2613       BuildMI(BB, IP, X86::MOV32ri, 1, Op1RegL).addImm(CLow);
2614       BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
2615       BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1RegL);  // AL*BL
2616
2617       BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX);   // AL*BL
2618       BuildMI(BB, IP, X86::MOV32rr, 1,
2619               OverflowReg).addReg(X86::EDX);                    // AL*BL >> 32
2620     }
2621
2622     unsigned AHBLReg = makeAnotherReg(Type::UIntTy);   // AH*BL
2623     doMultiplyConst(&BB, IP, AHBLReg, Type::UIntTy, Op0Reg+1, CLow);
2624
2625     unsigned AHBLplusOverflowReg;
2626     if (OverflowReg) {
2627       AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
2628       BuildMI(BB, IP, X86::ADD32rr, 2,                // AH*BL+(AL*BL >> 32)
2629               AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
2630     } else {
2631       AHBLplusOverflowReg = AHBLReg;
2632     }
2633
2634     if (CHi == 0) {
2635       BuildMI(BB, IP, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg);
2636     } else {
2637       unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
2638       doMultiplyConst(&BB, IP, ALBHReg, Type::UIntTy, Op0Reg, CHi);
2639
2640       BuildMI(BB, IP, X86::ADD32rr, 2,      // AL*BH + AH*BL + (AL*BL >> 32)
2641               DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
2642     }
2643     return;
2644   }
2645
2646   // General 64x64 multiply
2647
2648   unsigned Op1Reg  = getReg(Op1, &BB, IP);
2649   // Multiply the two low parts... capturing carry into EDX
2650   BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
2651   BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1Reg);  // AL*BL
2652
2653   unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
2654   BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX);     // AL*BL
2655   BuildMI(BB, IP, X86::MOV32rr, 1,
2656           OverflowReg).addReg(X86::EDX); // AL*BL >> 32
2657
2658   unsigned AHBLReg = makeAnotherReg(Type::UIntTy);   // AH*BL
2659   BuildMI(BB, IP, X86::IMUL32rr, 2,
2660           AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
2661
2662   unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
2663   BuildMI(BB, IP, X86::ADD32rr, 2,                // AH*BL+(AL*BL >> 32)
2664           AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
2665
2666   unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
2667   BuildMI(BB, IP, X86::IMUL32rr, 2,
2668           ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
2669
2670   BuildMI(BB, IP, X86::ADD32rr, 2,      // AL*BH + AH*BL + (AL*BL >> 32)
2671           DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
2672 }
2673
2674
2675 /// visitDivRem - Handle division and remainder instructions... these
2676 /// instruction both require the same instructions to be generated, they just
2677 /// select the result from a different register.  Note that both of these
2678 /// instructions work differently for signed and unsigned operands.
2679 ///
2680 void X86ISel::visitDivRem(BinaryOperator &I) {
2681   unsigned ResultReg = getReg(I);
2682   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2683
2684   // Fold loads into floating point divides.
2685   if (getClass(Op0->getType()) == cFP) {
2686     if (LoadInst *LI = dyn_cast<LoadInst>(Op1))
2687       if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
2688         const Type *Ty = Op0->getType();
2689         assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
2690         unsigned Opcode = Ty == Type::FloatTy ? X86::FDIV32m : X86::FDIV64m;
2691
2692         unsigned Op0r = getReg(Op0);
2693         if (AllocaInst *AI = dyn_castFixedAlloca(LI->getOperand(0))) {
2694           unsigned FI = getFixedSizedAllocaFI(AI);
2695           addFrameReference(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), FI);
2696         } else {
2697           X86AddressMode AM;
2698           getAddressingMode(LI->getOperand(0), AM);
2699
2700           addFullAddress(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), AM);
2701         }
2702         return;
2703       }
2704
2705     if (LoadInst *LI = dyn_cast<LoadInst>(Op0))
2706       if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
2707         const Type *Ty = Op0->getType();
2708         assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
2709         unsigned Opcode = Ty == Type::FloatTy ? X86::FDIVR32m : X86::FDIVR64m;
2710
2711         unsigned Op1r = getReg(Op1);
2712         if (AllocaInst *AI = dyn_castFixedAlloca(LI->getOperand(0))) {
2713           unsigned FI = getFixedSizedAllocaFI(AI);
2714           addFrameReference(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op1r), FI);
2715         } else {
2716           X86AddressMode AM;
2717           getAddressingMode(LI->getOperand(0), AM);
2718           addFullAddress(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op1r), AM);
2719         }
2720         return;
2721       }
2722   }
2723
2724
2725   MachineBasicBlock::iterator IP = BB->end();
2726   emitDivRemOperation(BB, IP, Op0, Op1,
2727                       I.getOpcode() == Instruction::Div, ResultReg);
2728 }
2729
2730 void X86ISel::emitDivRemOperation(MachineBasicBlock *BB,
2731                                   MachineBasicBlock::iterator IP,
2732                                   Value *Op0, Value *Op1, bool isDiv,
2733                                   unsigned ResultReg) {
2734   const Type *Ty = Op0->getType();
2735   unsigned Class = getClass(Ty);
2736   switch (Class) {
2737   case cFP:              // Floating point divide
2738     if (isDiv) {
2739       emitBinaryFPOperation(BB, IP, Op0, Op1, 3, ResultReg);
2740       return;
2741     } else {               // Floating point remainder...
2742       unsigned Op0Reg = getReg(Op0, BB, IP);
2743       unsigned Op1Reg = getReg(Op1, BB, IP);
2744       MachineInstr *TheCall =
2745         BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("fmod", true);
2746       std::vector<ValueRecord> Args;
2747       Args.push_back(ValueRecord(Op0Reg, Type::DoubleTy));
2748       Args.push_back(ValueRecord(Op1Reg, Type::DoubleTy));
2749       doCall(ValueRecord(ResultReg, Type::DoubleTy), TheCall, Args);
2750     }
2751     return;
2752   case cLong: {
2753     static const char *FnName[] =
2754       { "__moddi3", "__divdi3", "__umoddi3", "__udivdi3" };
2755     unsigned Op0Reg = getReg(Op0, BB, IP);
2756     unsigned Op1Reg = getReg(Op1, BB, IP);
2757     unsigned NameIdx = Ty->isUnsigned()*2 + isDiv;
2758     MachineInstr *TheCall =
2759       BuildMI(X86::CALLpcrel32, 1).addExternalSymbol(FnName[NameIdx], true);
2760
2761     std::vector<ValueRecord> Args;
2762     Args.push_back(ValueRecord(Op0Reg, Type::LongTy));
2763     Args.push_back(ValueRecord(Op1Reg, Type::LongTy));
2764     doCall(ValueRecord(ResultReg, Type::LongTy), TheCall, Args);
2765     return;
2766   }
2767   case cByte: case cShort: case cInt:
2768     break;          // Small integrals, handled below...
2769   default: assert(0 && "Unknown class!");
2770   }
2771
2772   static const unsigned MovOpcode[]={ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr };
2773   static const unsigned NEGOpcode[]={ X86::NEG8r,  X86::NEG16r,  X86::NEG32r };
2774   static const unsigned SAROpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri };
2775   static const unsigned SHROpcode[]={ X86::SHR8ri, X86::SHR16ri, X86::SHR32ri };
2776   static const unsigned ADDOpcode[]={ X86::ADD8rr, X86::ADD16rr, X86::ADD32rr };
2777
2778   // Special case signed division by power of 2.
2779   if (ConstantSInt *CI = dyn_cast<ConstantSInt>(Op1))
2780     if (isDiv) {
2781       assert(Class != cLong && "This doesn't handle 64-bit divides!");
2782       int V = CI->getValue();
2783
2784       if (V == 1) {       // X /s 1 => X
2785         unsigned Op0Reg = getReg(Op0, BB, IP);
2786         BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(Op0Reg);
2787         return;
2788       }
2789
2790       if (V == -1) {      // X /s -1 => -X
2791         unsigned Op0Reg = getReg(Op0, BB, IP);
2792         BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(Op0Reg);
2793         return;
2794       }
2795
2796       if (V == 2 || V == -2) {      // X /s 2
2797         static const unsigned CMPOpcode[] = {
2798           X86::CMP8ri, X86::CMP16ri, X86::CMP32ri
2799         };
2800         static const unsigned SBBOpcode[] = {
2801           X86::SBB8ri, X86::SBB16ri, X86::SBB32ri
2802         };
2803         unsigned Op0Reg = getReg(Op0, BB, IP);
2804         unsigned SignBit = 1 << (CI->getType()->getPrimitiveSize()*8-1);
2805         BuildMI(*BB, IP, CMPOpcode[Class], 2).addReg(Op0Reg).addImm(SignBit);
2806
2807         unsigned TmpReg = makeAnotherReg(Op0->getType());
2808         BuildMI(*BB, IP, SBBOpcode[Class], 2, TmpReg).addReg(Op0Reg).addImm(-1);
2809
2810         unsigned TmpReg2 = V == 2 ? ResultReg : makeAnotherReg(Op0->getType());
2811         BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg2).addReg(TmpReg).addImm(1);
2812         if (V == -2) {
2813           BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(TmpReg2);
2814         }
2815         return;
2816       }
2817
2818       bool isNeg = false;
2819       if (V < 0) {         // Not a positive power of 2?
2820         V = -V;
2821         isNeg = true;      // Maybe it's a negative power of 2.
2822       }
2823       if (unsigned Log = ExactLog2(V)) {
2824         --Log;
2825         unsigned Op0Reg = getReg(Op0, BB, IP);
2826         unsigned TmpReg = makeAnotherReg(Op0->getType());
2827         BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg)
2828           .addReg(Op0Reg).addImm(Log-1);
2829         unsigned TmpReg2 = makeAnotherReg(Op0->getType());
2830         BuildMI(*BB, IP, SHROpcode[Class], 2, TmpReg2)
2831           .addReg(TmpReg).addImm(32-Log);
2832         unsigned TmpReg3 = makeAnotherReg(Op0->getType());
2833         BuildMI(*BB, IP, ADDOpcode[Class], 2, TmpReg3)
2834           .addReg(Op0Reg).addReg(TmpReg2);
2835
2836         unsigned TmpReg4 = isNeg ? makeAnotherReg(Op0->getType()) : ResultReg;
2837         BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg4)
2838           .addReg(TmpReg3).addImm(Log);
2839         if (isNeg)
2840           BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(TmpReg4);
2841         return;
2842       }
2843     } else {    // X % C
2844       assert(Class != cLong && "This doesn't handle 64-bit remainder!");
2845       int V = CI->getValue();
2846
2847       if (V == 2 || V == -2) {       // X % 2, X % -2
2848         static const unsigned SExtOpcode[] = { X86::CBW, X86::CWD, X86::CDQ };
2849         static const unsigned BaseReg[]    = { X86::AL , X86::AX , X86::EAX };
2850         static const unsigned SExtReg[]    = { X86::AH , X86::DX , X86::EDX };
2851         static const unsigned ANDOpcode[]  = {
2852           X86::AND8ri, X86::AND16ri, X86::AND32ri
2853         };
2854         static const unsigned XOROpcode[]  = {
2855           X86::XOR8rr, X86::XOR16rr, X86::XOR32rr
2856         };
2857         static const unsigned SUBOpcode[]  = {
2858           X86::SUB8rr, X86::SUB16rr, X86::SUB32rr
2859         };
2860
2861         // Sign extend result into reg of -1 or 0.
2862         unsigned Op0Reg = getReg(Op0, BB, IP);
2863         BuildMI(*BB, IP, MovOpcode[Class], 1, BaseReg[Class]).addReg(Op0Reg);
2864         BuildMI(*BB, IP, SExtOpcode[Class], 0);
2865         unsigned TmpReg0 = makeAnotherReg(Op0->getType());
2866         BuildMI(*BB, IP, MovOpcode[Class], 1, TmpReg0).addReg(SExtReg[Class]);
2867
2868         unsigned TmpReg1 = makeAnotherReg(Op0->getType());
2869         BuildMI(*BB, IP, ANDOpcode[Class], 2, TmpReg1).addReg(Op0Reg).addImm(1);
2870
2871         unsigned TmpReg2 = makeAnotherReg(Op0->getType());
2872         BuildMI(*BB, IP, XOROpcode[Class], 2,
2873                 TmpReg2).addReg(TmpReg1).addReg(TmpReg0);
2874         BuildMI(*BB, IP, SUBOpcode[Class], 2,
2875                 ResultReg).addReg(TmpReg2).addReg(TmpReg0);
2876         return;
2877       }
2878     }
2879
2880   static const unsigned Regs[]     ={ X86::AL    , X86::AX     , X86::EAX     };
2881   static const unsigned ClrOpcode[]={ X86::MOV8ri, X86::MOV16ri, X86::MOV32ri };
2882   static const unsigned ExtRegs[]  ={ X86::AH    , X86::DX     , X86::EDX     };
2883
2884   static const unsigned DivOpcode[][4] = {
2885     { X86::DIV8r , X86::DIV16r , X86::DIV32r , 0 },  // Unsigned division
2886     { X86::IDIV8r, X86::IDIV16r, X86::IDIV32r, 0 },  // Signed division
2887   };
2888
2889   unsigned Reg    = Regs[Class];
2890   unsigned ExtReg = ExtRegs[Class];
2891
2892   // Put the first operand into one of the A registers...
2893   unsigned Op0Reg = getReg(Op0, BB, IP);
2894   unsigned Op1Reg = getReg(Op1, BB, IP);
2895   BuildMI(*BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
2896
2897   if (Ty->isSigned()) {
2898     // Emit a sign extension instruction...
2899     unsigned ShiftResult = makeAnotherReg(Op0->getType());
2900     BuildMI(*BB, IP, SAROpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31);
2901     BuildMI(*BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult);
2902
2903     // Emit the appropriate divide or remainder instruction...
2904     BuildMI(*BB, IP, DivOpcode[1][Class], 1).addReg(Op1Reg);
2905   } else {
2906     // If unsigned, emit a zeroing instruction... (reg = 0)
2907     BuildMI(*BB, IP, ClrOpcode[Class], 2, ExtReg).addImm(0);
2908
2909     // Emit the appropriate divide or remainder instruction...
2910     BuildMI(*BB, IP, DivOpcode[0][Class], 1).addReg(Op1Reg);
2911   }
2912
2913   // Figure out which register we want to pick the result out of...
2914   unsigned DestReg = isDiv ? Reg : ExtReg;
2915
2916   // Put the result into the destination register...
2917   BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(DestReg);
2918 }
2919
2920
2921 /// Shift instructions: 'shl', 'sar', 'shr' - Some special cases here
2922 /// for constant immediate shift values, and for constant immediate
2923 /// shift values equal to 1. Even the general case is sort of special,
2924 /// because the shift amount has to be in CL, not just any old register.
2925 ///
2926 void X86ISel::visitShiftInst(ShiftInst &I) {
2927   MachineBasicBlock::iterator IP = BB->end ();
2928   emitShiftOperation (BB, IP, I.getOperand (0), I.getOperand (1),
2929                       I.getOpcode () == Instruction::Shl, I.getType (),
2930                       getReg (I));
2931 }
2932
2933 /// Emit code for a 'SHLD DestReg, Op0, Op1, Amt' operation, where Amt is a
2934 /// constant.
2935 void X86ISel::doSHLDConst(MachineBasicBlock *MBB,
2936                           MachineBasicBlock::iterator IP,
2937                           unsigned DestReg, unsigned Op0Reg, unsigned Op1Reg,
2938                           unsigned Amt) {
2939   // SHLD is a very inefficient operation on every processor, try to do
2940   // somethign simpler for common values of 'Amt'.
2941   if (Amt == 0) {
2942     BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0Reg);
2943   } else if (Amt == 1) {
2944     unsigned Tmp = makeAnotherReg(Type::UIntTy);
2945     BuildMI(*MBB, IP, X86::ADD32rr, 2, Tmp).addReg(Op1Reg).addReg(Op1Reg);
2946     BuildMI(*MBB, IP, X86::ADC32rr, 2, DestReg).addReg(Op0Reg).addReg(Op0Reg);
2947   } else if (Amt == 2 || Amt == 3) {
2948     // On the P4 and Athlon it is cheaper to replace shld ..., 2|3 with a
2949     // shift/lea pair.  NOTE: This should not be done on the P6 family!
2950     unsigned Tmp = makeAnotherReg(Type::UIntTy);
2951     BuildMI(*MBB, IP, X86::SHR32ri, 2, Tmp).addReg(Op1Reg).addImm(32-Amt);
2952     X86AddressMode AM;
2953     AM.BaseType = X86AddressMode::RegBase;
2954     AM.Base.Reg = Tmp;
2955     AM.Scale = 1 << Amt;
2956     AM.IndexReg = Op0Reg;
2957     AM.Disp = 0;
2958     addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 4, DestReg), AM);
2959   } else {
2960     // NOTE: It is always cheaper on the P4 to emit SHLD as two shifts and an OR
2961     // than it is to emit a real SHLD.
2962
2963     BuildMI(*MBB, IP, X86::SHLD32rri8, 3,
2964             DestReg).addReg(Op0Reg).addReg(Op1Reg).addImm(Amt);
2965   }
2966 }
2967
2968 /// emitShiftOperation - Common code shared between visitShiftInst and
2969 /// constant expression support.
2970 void X86ISel::emitShiftOperation(MachineBasicBlock *MBB,
2971                                  MachineBasicBlock::iterator IP,
2972                                  Value *Op, Value *ShiftAmount,
2973                                  bool isLeftShift, const Type *ResultTy,
2974                                  unsigned DestReg) {
2975   unsigned SrcReg = getReg (Op, MBB, IP);
2976   bool isSigned = ResultTy->isSigned ();
2977   unsigned Class = getClass (ResultTy);
2978
2979   static const unsigned ConstantOperand[][3] = {
2980     { X86::SHR8ri, X86::SHR16ri, X86::SHR32ri },  // SHR
2981     { X86::SAR8ri, X86::SAR16ri, X86::SAR32ri },  // SAR
2982     { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri },  // SHL
2983     { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri },  // SAL = SHL
2984   };
2985
2986   static const unsigned NonConstantOperand[][3] = {
2987     { X86::SHR8rCL, X86::SHR16rCL, X86::SHR32rCL },  // SHR
2988     { X86::SAR8rCL, X86::SAR16rCL, X86::SAR32rCL },  // SAR
2989     { X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL },  // SHL
2990     { X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL },  // SAL = SHL
2991   };
2992
2993   // Longs, as usual, are handled specially.
2994   if (Class == cLong) {
2995     if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(ShiftAmount)) {
2996       unsigned Amount = CUI->getValue();
2997       if (Amount == 1 && isLeftShift) {   // X << 1 == X+X
2998         BuildMI(*MBB, IP, X86::ADD32rr, 2,
2999                 DestReg).addReg(SrcReg).addReg(SrcReg);
3000         BuildMI(*MBB, IP, X86::ADC32rr, 2,
3001                 DestReg+1).addReg(SrcReg+1).addReg(SrcReg+1);
3002       } else if (Amount < 32) {
3003         const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
3004         if (isLeftShift) {
3005           doSHLDConst(MBB, IP, DestReg+1, SrcReg+1, SrcReg, Amount);
3006           BuildMI(*MBB, IP, Opc[2], 2, DestReg).addReg(SrcReg).addImm(Amount);
3007         } else {
3008           BuildMI(*MBB, IP, X86::SHRD32rri8, 3,
3009                   DestReg).addReg(SrcReg  ).addReg(SrcReg+1).addImm(Amount);
3010           BuildMI(*MBB, IP, Opc[2],2,DestReg+1).addReg(SrcReg+1).addImm(Amount);
3011         }
3012       } else if (Amount == 32) {
3013         if (isLeftShift) {
3014           BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg);
3015           BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
3016         } else {
3017           BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg+1);
3018           if (!isSigned) {
3019             BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
3020           } else {
3021             BuildMI(*MBB, IP, X86::SAR32ri, 2,
3022                     DestReg+1).addReg(SrcReg).addImm(31);
3023           }
3024         }
3025       } else {                 // Shifting more than 32 bits
3026         Amount -= 32;
3027         if (isLeftShift) {
3028           BuildMI(*MBB, IP, X86::SHL32ri, 2,
3029                   DestReg + 1).addReg(SrcReg).addImm(Amount);
3030           BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
3031         } else {
3032           BuildMI(*MBB, IP, isSigned ? X86::SAR32ri : X86::SHR32ri, 2,
3033                   DestReg).addReg(SrcReg+1).addImm(Amount);
3034           BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
3035         }
3036       }
3037     } else {
3038       unsigned TmpReg = makeAnotherReg(Type::IntTy);
3039       if (!isLeftShift && isSigned) {
3040         // If this is a SHR of a Long, then we need to do funny sign extension
3041         // stuff.  TmpReg gets the value to use as the high-part if we are
3042         // shifting more than 32 bits.
3043         BuildMI(*MBB, IP, X86::SAR32ri, 2, TmpReg).addReg(SrcReg).addImm(31);
3044       } else {
3045         // Other shifts use a fixed zero value if the shift is more than 32
3046         // bits.
3047         BuildMI(*MBB, IP, X86::MOV32ri, 1, TmpReg).addImm(0);
3048       }
3049
3050       // Initialize CL with the shift amount...
3051       unsigned ShiftAmountReg = getReg(ShiftAmount, MBB, IP);
3052       BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg);
3053
3054       unsigned TmpReg2 = makeAnotherReg(Type::IntTy);
3055       unsigned TmpReg3 = makeAnotherReg(Type::IntTy);
3056       if (isLeftShift) {
3057         // TmpReg2 = shld inHi, inLo
3058         BuildMI(*MBB, IP, X86::SHLD32rrCL,2,TmpReg2).addReg(SrcReg+1)
3059                                                     .addReg(SrcReg);
3060         // TmpReg3 = shl  inLo, CL
3061         BuildMI(*MBB, IP, X86::SHL32rCL, 1, TmpReg3).addReg(SrcReg);
3062
3063         // Set the flags to indicate whether the shift was by more than 32 bits.
3064         BuildMI(*MBB, IP, X86::TEST8ri, 2).addReg(X86::CL).addImm(32);
3065
3066         // DestHi = (>32) ? TmpReg3 : TmpReg2;
3067         BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
3068                 DestReg+1).addReg(TmpReg2).addReg(TmpReg3);
3069         // DestLo = (>32) ? TmpReg : TmpReg3;
3070         BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
3071             DestReg).addReg(TmpReg3).addReg(TmpReg);
3072       } else {
3073         // TmpReg2 = shrd inLo, inHi
3074         BuildMI(*MBB, IP, X86::SHRD32rrCL,2,TmpReg2).addReg(SrcReg)
3075                                                     .addReg(SrcReg+1);
3076         // TmpReg3 = s[ah]r  inHi, CL
3077         BuildMI(*MBB, IP, isSigned ? X86::SAR32rCL : X86::SHR32rCL, 1, TmpReg3)
3078                        .addReg(SrcReg+1);
3079
3080         // Set the flags to indicate whether the shift was by more than 32 bits.
3081         BuildMI(*MBB, IP, X86::TEST8ri, 2).addReg(X86::CL).addImm(32);
3082
3083         // DestLo = (>32) ? TmpReg3 : TmpReg2;
3084         BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
3085                 DestReg).addReg(TmpReg2).addReg(TmpReg3);
3086
3087         // DestHi = (>32) ? TmpReg : TmpReg3;
3088         BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
3089                 DestReg+1).addReg(TmpReg3).addReg(TmpReg);
3090       }
3091     }
3092     return;
3093   }
3094
3095   if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(ShiftAmount)) {
3096     // The shift amount is constant, guaranteed to be a ubyte. Get its value.
3097     assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?");
3098
3099     if (CUI->getValue() == 1 && isLeftShift) {    // X << 1 -> X+X
3100       static const int AddOpC[] = { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr };
3101       BuildMI(*MBB, IP, AddOpC[Class], 2,DestReg).addReg(SrcReg).addReg(SrcReg);
3102     } else {
3103       const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
3104       BuildMI(*MBB, IP, Opc[Class], 2,
3105               DestReg).addReg(SrcReg).addImm(CUI->getValue());
3106     }
3107   } else {                  // The shift amount is non-constant.
3108     unsigned ShiftAmountReg = getReg (ShiftAmount, MBB, IP);
3109     BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg);
3110
3111     const unsigned *Opc = NonConstantOperand[isLeftShift*2+isSigned];
3112     BuildMI(*MBB, IP, Opc[Class], 1, DestReg).addReg(SrcReg);
3113   }
3114 }
3115
3116
3117 /// visitLoadInst - Implement LLVM load instructions in terms of the x86 'mov'
3118 /// instruction.  The load and store instructions are the only place where we
3119 /// need to worry about the memory layout of the target machine.
3120 ///
3121 void X86ISel::visitLoadInst(LoadInst &I) {
3122   // Check to see if this load instruction is going to be folded into a binary
3123   // instruction, like add.  If so, we don't want to emit it.  Wouldn't a real
3124   // pattern matching instruction selector be nice?
3125   unsigned Class = getClassB(I.getType());
3126   if (I.hasOneUse()) {
3127     Instruction *User = cast<Instruction>(I.use_back());
3128     switch (User->getOpcode()) {
3129     case Instruction::Cast:
3130       // If this is a cast from a signed-integer type to a floating point type,
3131       // fold the cast here.
3132       if (getClassB(User->getType()) == cFP &&
3133           (I.getType() == Type::ShortTy || I.getType() == Type::IntTy ||
3134            I.getType() == Type::LongTy)) {
3135         unsigned DestReg = getReg(User);
3136         static const unsigned Opcode[] = {
3137           0/*BYTE*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m
3138         };
3139
3140         if (AllocaInst *AI = dyn_castFixedAlloca(I.getOperand(0))) {
3141           unsigned FI = getFixedSizedAllocaFI(AI);
3142           addFrameReference(BuildMI(BB, Opcode[Class], 4, DestReg), FI);
3143         } else {
3144           X86AddressMode AM;
3145           getAddressingMode(I.getOperand(0), AM);
3146           addFullAddress(BuildMI(BB, Opcode[Class], 4, DestReg), AM);
3147         }
3148         return;
3149       } else {
3150         User = 0;
3151       }
3152       break;
3153
3154     case Instruction::Add:
3155     case Instruction::Sub:
3156     case Instruction::And:
3157     case Instruction::Or:
3158     case Instruction::Xor:
3159       if (Class == cLong) User = 0;
3160       break;
3161     case Instruction::Mul:
3162     case Instruction::Div:
3163       if (Class != cFP) User = 0;
3164       break;  // Folding only implemented for floating point.
3165     default: User = 0; break;
3166     }
3167
3168     if (User) {
3169       // Okay, we found a user.  If the load is the first operand and there is
3170       // no second operand load, reverse the operand ordering.  Note that this
3171       // can fail for a subtract (ie, no change will be made).
3172       bool Swapped = false;
3173       if (!isa<LoadInst>(User->getOperand(1)))
3174         Swapped = !cast<BinaryOperator>(User)->swapOperands();
3175
3176       // Okay, now that everything is set up, if this load is used by the second
3177       // operand, and if there are no instructions that invalidate the load
3178       // before the binary operator, eliminate the load.
3179       if (User->getOperand(1) == &I &&
3180           isSafeToFoldLoadIntoInstruction(I, *User))
3181         return;   // Eliminate the load!
3182
3183       // If this is a floating point sub or div, we won't be able to swap the
3184       // operands, but we will still be able to eliminate the load.
3185       if (Class == cFP && User->getOperand(0) == &I &&
3186           !isa<LoadInst>(User->getOperand(1)) &&
3187           (User->getOpcode() == Instruction::Sub ||
3188            User->getOpcode() == Instruction::Div) &&
3189           isSafeToFoldLoadIntoInstruction(I, *User))
3190         return;  // Eliminate the load!
3191
3192       // If we swapped the operands to the instruction, but couldn't fold the
3193       // load anyway, swap them back.  We don't want to break add X, int
3194       // folding.
3195       if (Swapped) cast<BinaryOperator>(User)->swapOperands();
3196     }
3197   }
3198
3199   static const unsigned Opcodes[] = {
3200     X86::MOV8rm, X86::MOV16rm, X86::MOV32rm, X86::FLD32m, X86::MOV32rm
3201   };
3202   unsigned Opcode = Opcodes[Class];
3203   if (I.getType() == Type::DoubleTy) Opcode = X86::FLD64m;
3204
3205   unsigned DestReg = getReg(I);
3206
3207   if (AllocaInst *AI = dyn_castFixedAlloca(I.getOperand(0))) {
3208     unsigned FI = getFixedSizedAllocaFI(AI);
3209     if (Class == cLong) {
3210       addFrameReference(BuildMI(BB, X86::MOV32rm, 4, DestReg), FI);
3211       addFrameReference(BuildMI(BB, X86::MOV32rm, 4, DestReg+1), FI, 4);
3212     } else {
3213       addFrameReference(BuildMI(BB, Opcode, 4, DestReg), FI);
3214     }
3215   } else {
3216     X86AddressMode AM;
3217     getAddressingMode(I.getOperand(0), AM);
3218
3219     if (Class == cLong) {
3220       addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg), AM);
3221       AM.Disp += 4;
3222       addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg+1), AM);
3223     } else {
3224       addFullAddress(BuildMI(BB, Opcode, 4, DestReg), AM);
3225     }
3226   }
3227 }
3228
3229 /// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov'
3230 /// instruction.
3231 ///
3232 void X86ISel::visitStoreInst(StoreInst &I) {
3233   X86AddressMode AM;
3234   getAddressingMode(I.getOperand(1), AM);
3235
3236   const Type *ValTy = I.getOperand(0)->getType();
3237   unsigned Class = getClassB(ValTy);
3238
3239   if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(0))) {
3240     uint64_t Val = CI->getRawValue();
3241     if (Class == cLong) {
3242       addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(Val & ~0U);
3243       AM.Disp += 4;
3244       addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(Val>>32);
3245     } else {
3246       static const unsigned Opcodes[] = {
3247         X86::MOV8mi, X86::MOV16mi, X86::MOV32mi
3248       };
3249       unsigned Opcode = Opcodes[Class];
3250       addFullAddress(BuildMI(BB, Opcode, 5), AM).addImm(Val);
3251     }
3252   } else if (isa<ConstantPointerNull>(I.getOperand(0))) {
3253     addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(0);
3254   } else if (ConstantBool *CB = dyn_cast<ConstantBool>(I.getOperand(0))) {
3255     addFullAddress(BuildMI(BB, X86::MOV8mi, 5), AM).addImm(CB->getValue());
3256   } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) {
3257     // Store constant FP values with integer instructions to avoid having to
3258     // load the constants from the constant pool then do a store.
3259     if (CFP->getType() == Type::FloatTy) {
3260       union {
3261         unsigned I;
3262         float    F;
3263       } V;
3264       V.F = CFP->getValue();
3265       addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(V.I);
3266     } else {
3267       union {
3268         uint64_t I;
3269         double   F;
3270       } V;
3271       V.F = CFP->getValue();
3272       addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm((unsigned)V.I);
3273       AM.Disp += 4;
3274       addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(
3275                                                           unsigned(V.I >> 32));
3276     }
3277
3278   } else if (Class == cLong) {
3279     unsigned ValReg = getReg(I.getOperand(0));
3280     addFullAddress(BuildMI(BB, X86::MOV32mr, 5), AM).addReg(ValReg);
3281     AM.Disp += 4;
3282     addFullAddress(BuildMI(BB, X86::MOV32mr, 5), AM).addReg(ValReg+1);
3283   } else {
3284     // FIXME: stop emitting these two instructions:
3285     //    movl $global,%eax
3286     //    movl %eax,(%ebx)
3287     // when one instruction will suffice.  That includes when the global
3288     // has an offset applied to it.
3289     unsigned ValReg = getReg(I.getOperand(0));
3290     static const unsigned Opcodes[] = {
3291       X86::MOV8mr, X86::MOV16mr, X86::MOV32mr, X86::FST32m
3292     };
3293     unsigned Opcode = Opcodes[Class];
3294     if (ValTy == Type::DoubleTy) Opcode = X86::FST64m;
3295
3296     addFullAddress(BuildMI(BB, Opcode, 1+4), AM).addReg(ValReg);
3297   }
3298 }
3299
3300
3301 /// visitCastInst - Here we have various kinds of copying with or without sign
3302 /// extension going on.
3303 ///
3304 void X86ISel::visitCastInst(CastInst &CI) {
3305   Value *Op = CI.getOperand(0);
3306
3307   unsigned SrcClass = getClassB(Op->getType());
3308   unsigned DestClass = getClassB(CI.getType());
3309   // Noop casts are not emitted: getReg will return the source operand as the
3310   // register to use for any uses of the noop cast.
3311   if (DestClass == SrcClass) {
3312     // The only detail in this plan is that casts from double -> float are
3313     // truncating operations that we have to codegen through memory (despite
3314     // the fact that the source/dest registers are the same class).
3315     if (CI.getType() != Type::FloatTy || Op->getType() != Type::DoubleTy)
3316       return;
3317   }
3318
3319   // If this is a cast from a 32-bit integer to a Long type, and the only uses
3320   // of the case are GEP instructions, then the cast does not need to be
3321   // generated explicitly, it will be folded into the GEP.
3322   if (DestClass == cLong && SrcClass == cInt) {
3323     bool AllUsesAreGEPs = true;
3324     for (Value::use_iterator I = CI.use_begin(), E = CI.use_end(); I != E; ++I)
3325       if (!isa<GetElementPtrInst>(*I)) {
3326         AllUsesAreGEPs = false;
3327         break;
3328       }
3329
3330     // No need to codegen this cast if all users are getelementptr instrs...
3331     if (AllUsesAreGEPs) return;
3332   }
3333
3334   // If this cast converts a load from a short,int, or long integer to a FP
3335   // value, we will have folded this cast away.
3336   if (DestClass == cFP && isa<LoadInst>(Op) && Op->hasOneUse() &&
3337       (Op->getType() == Type::ShortTy || Op->getType() == Type::IntTy ||
3338        Op->getType() == Type::LongTy))
3339     return;
3340
3341
3342   unsigned DestReg = getReg(CI);
3343   MachineBasicBlock::iterator MI = BB->end();
3344   emitCastOperation(BB, MI, Op, CI.getType(), DestReg);
3345 }
3346
3347 /// emitCastOperation - Common code shared between visitCastInst and constant
3348 /// expression cast support.
3349 ///
3350 void X86ISel::emitCastOperation(MachineBasicBlock *BB,
3351                                 MachineBasicBlock::iterator IP,
3352                                 Value *Src, const Type *DestTy,
3353                                 unsigned DestReg) {
3354   const Type *SrcTy = Src->getType();
3355   unsigned SrcClass = getClassB(SrcTy);
3356   unsigned DestClass = getClassB(DestTy);
3357   unsigned SrcReg = getReg(Src, BB, IP);
3358
3359   // Implement casts to bool by using compare on the operand followed by set if
3360   // not zero on the result.
3361   if (DestTy == Type::BoolTy) {
3362     switch (SrcClass) {
3363     case cByte:
3364       BuildMI(*BB, IP, X86::TEST8rr, 2).addReg(SrcReg).addReg(SrcReg);
3365       break;
3366     case cShort:
3367       BuildMI(*BB, IP, X86::TEST16rr, 2).addReg(SrcReg).addReg(SrcReg);
3368       break;
3369     case cInt:
3370       BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg).addReg(SrcReg);
3371       break;
3372     case cLong: {
3373       unsigned TmpReg = makeAnotherReg(Type::IntTy);
3374       BuildMI(*BB, IP, X86::OR32rr, 2, TmpReg).addReg(SrcReg).addReg(SrcReg+1);
3375       break;
3376     }
3377     case cFP:
3378       BuildMI(*BB, IP, X86::FTST, 1).addReg(SrcReg);
3379       BuildMI(*BB, IP, X86::FNSTSW8r, 0);
3380       BuildMI(*BB, IP, X86::SAHF, 1);
3381       break;
3382     }
3383
3384     // If the zero flag is not set, then the value is true, set the byte to
3385     // true.
3386     BuildMI(*BB, IP, X86::SETNEr, 1, DestReg);
3387     return;
3388   }
3389
3390   static const unsigned RegRegMove[] = {
3391     X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::MOV32rr
3392   };
3393
3394   // Implement casts between values of the same type class (as determined by
3395   // getClass) by using a register-to-register move.
3396   if (SrcClass == DestClass) {
3397     if (SrcClass <= cInt || (SrcClass == cFP && SrcTy == DestTy)) {
3398       BuildMI(*BB, IP, RegRegMove[SrcClass], 1, DestReg).addReg(SrcReg);
3399     } else if (SrcClass == cFP) {
3400       if (SrcTy == Type::FloatTy) {  // double -> float
3401         assert(DestTy == Type::DoubleTy && "Unknown cFP member!");
3402         BuildMI(*BB, IP, X86::FpMOV, 1, DestReg).addReg(SrcReg);
3403       } else {                       // float -> double
3404         assert(SrcTy == Type::DoubleTy && DestTy == Type::FloatTy &&
3405                "Unknown cFP member!");
3406         // Truncate from double to float by storing to memory as short, then
3407         // reading it back.
3408         unsigned FltAlign = TM.getTargetData().getFloatAlignment();
3409         int FrameIdx = F->getFrameInfo()->CreateStackObject(4, FltAlign);
3410         addFrameReference(BuildMI(*BB, IP, X86::FST32m, 5), FrameIdx).addReg(SrcReg);
3411         addFrameReference(BuildMI(*BB, IP, X86::FLD32m, 5, DestReg), FrameIdx);
3412       }
3413     } else if (SrcClass == cLong) {
3414       BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg);
3415       BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg+1);
3416     } else {
3417       assert(0 && "Cannot handle this type of cast instruction!");
3418       abort();
3419     }
3420     return;
3421   }
3422
3423   // Handle cast of SMALLER int to LARGER int using a move with sign extension
3424   // or zero extension, depending on whether the source type was signed.
3425   if (SrcClass <= cInt && (DestClass <= cInt || DestClass == cLong) &&
3426       SrcClass < DestClass) {
3427     bool isLong = DestClass == cLong;
3428     if (isLong) DestClass = cInt;
3429
3430     static const unsigned Opc[][4] = {
3431       { X86::MOVSX16rr8, X86::MOVSX32rr8, X86::MOVSX32rr16, X86::MOV32rr }, // s
3432       { X86::MOVZX16rr8, X86::MOVZX32rr8, X86::MOVZX32rr16, X86::MOV32rr }  // u
3433     };
3434
3435     bool isUnsigned = SrcTy->isUnsigned() || SrcTy == Type::BoolTy;
3436     BuildMI(*BB, IP, Opc[isUnsigned][SrcClass + DestClass - 1], 1,
3437         DestReg).addReg(SrcReg);
3438
3439     if (isLong) {  // Handle upper 32 bits as appropriate...
3440       if (isUnsigned)     // Zero out top bits...
3441         BuildMI(*BB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
3442       else                // Sign extend bottom half...
3443         BuildMI(*BB, IP, X86::SAR32ri, 2, DestReg+1).addReg(DestReg).addImm(31);
3444     }
3445     return;
3446   }
3447
3448   // Special case long -> int ...
3449   if (SrcClass == cLong && DestClass == cInt) {
3450     BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg);
3451     return;
3452   }
3453
3454   // Handle cast of LARGER int to SMALLER int using a move to EAX followed by a
3455   // move out of AX or AL.
3456   if ((SrcClass <= cInt || SrcClass == cLong) && DestClass <= cInt
3457       && SrcClass > DestClass) {
3458     static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX, 0, X86::EAX };
3459     BuildMI(*BB, IP, RegRegMove[SrcClass], 1, AReg[SrcClass]).addReg(SrcReg);
3460     BuildMI(*BB, IP, RegRegMove[DestClass], 1, DestReg).addReg(AReg[DestClass]);
3461     return;
3462   }
3463
3464   // Handle casts from integer to floating point now...
3465   if (DestClass == cFP) {
3466     // Promote the integer to a type supported by FLD.  We do this because there
3467     // are no unsigned FLD instructions, so we must promote an unsigned value to
3468     // a larger signed value, then use FLD on the larger value.
3469     //
3470     const Type *PromoteType = 0;
3471     unsigned PromoteOpcode = 0;
3472     unsigned RealDestReg = DestReg;
3473     switch (SrcTy->getTypeID()) {
3474     case Type::BoolTyID:
3475     case Type::SByteTyID:
3476       // We don't have the facilities for directly loading byte sized data from
3477       // memory (even signed).  Promote it to 16 bits.
3478       PromoteType = Type::ShortTy;
3479       PromoteOpcode = X86::MOVSX16rr8;
3480       break;
3481     case Type::UByteTyID:
3482       PromoteType = Type::ShortTy;
3483       PromoteOpcode = X86::MOVZX16rr8;
3484       break;
3485     case Type::UShortTyID:
3486       PromoteType = Type::IntTy;
3487       PromoteOpcode = X86::MOVZX32rr16;
3488       break;
3489     case Type::ULongTyID:
3490     case Type::UIntTyID:
3491       // Don't fild into the read destination.
3492       DestReg = makeAnotherReg(Type::DoubleTy);
3493       break;
3494     default:  // No promotion needed...
3495       break;
3496     }
3497
3498     if (PromoteType) {
3499       unsigned TmpReg = makeAnotherReg(PromoteType);
3500       BuildMI(*BB, IP, PromoteOpcode, 1, TmpReg).addReg(SrcReg);
3501       SrcTy = PromoteType;
3502       SrcClass = getClass(PromoteType);
3503       SrcReg = TmpReg;
3504     }
3505
3506     // Spill the integer to memory and reload it from there...
3507     int FrameIdx =
3508       F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData());
3509
3510     if (SrcClass == cLong) {
3511       addFrameReference(BuildMI(*BB, IP, X86::MOV32mr, 5),
3512                         FrameIdx).addReg(SrcReg);
3513       addFrameReference(BuildMI(*BB, IP, X86::MOV32mr, 5),
3514                         FrameIdx, 4).addReg(SrcReg+1);
3515     } else {
3516       static const unsigned Op1[] = { X86::MOV8mr, X86::MOV16mr, X86::MOV32mr };
3517       addFrameReference(BuildMI(*BB, IP, Op1[SrcClass], 5),
3518                         FrameIdx).addReg(SrcReg);
3519     }
3520
3521     static const unsigned Op2[] =
3522       { 0/*byte*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m };
3523     addFrameReference(BuildMI(*BB, IP, Op2[SrcClass], 5, DestReg), FrameIdx);
3524
3525     if (SrcTy == Type::UIntTy) {
3526       // If this is a cast from uint -> double, we need to be careful about if
3527       // the "sign" bit is set.  If so, we don't want to make a negative number,
3528       // we want to make a positive number.  Emit code to add an offset if the
3529       // sign bit is set.
3530
3531       // Compute whether the sign bit is set by shifting the reg right 31 bits.
3532       unsigned IsNeg = makeAnotherReg(Type::IntTy);
3533       BuildMI(BB, X86::SHR32ri, 2, IsNeg).addReg(SrcReg).addImm(31);
3534
3535       // Create a CP value that has the offset in one word and 0 in the other.
3536       static ConstantInt *TheOffset = ConstantUInt::get(Type::ULongTy,
3537                                                         0x4f80000000000000ULL);
3538       unsigned CPI = F->getConstantPool()->getConstantPoolIndex(TheOffset);
3539       BuildMI(BB, X86::FADD32m, 5, RealDestReg).addReg(DestReg)
3540         .addConstantPoolIndex(CPI).addZImm(4).addReg(IsNeg).addSImm(0);
3541
3542     } else if (SrcTy == Type::ULongTy) {
3543       // We need special handling for unsigned 64-bit integer sources.  If the
3544       // input number has the "sign bit" set, then we loaded it incorrectly as a
3545       // negative 64-bit number.  In this case, add an offset value.
3546
3547       // Emit a test instruction to see if the dynamic input value was signed.
3548       BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg+1).addReg(SrcReg+1);
3549
3550       // If the sign bit is set, get a pointer to an offset, otherwise get a
3551       // pointer to a zero.
3552       MachineConstantPool *CP = F->getConstantPool();
3553       unsigned Zero = makeAnotherReg(Type::IntTy);
3554       Constant *Null = Constant::getNullValue(Type::UIntTy);
3555       addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Zero),
3556                                CP->getConstantPoolIndex(Null));
3557       unsigned Offset = makeAnotherReg(Type::IntTy);
3558       Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000);
3559
3560       addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Offset),
3561                                CP->getConstantPoolIndex(OffsetCst));
3562       unsigned Addr = makeAnotherReg(Type::IntTy);
3563       BuildMI(*BB, IP, X86::CMOVS32rr, 2, Addr).addReg(Zero).addReg(Offset);
3564
3565       // Load the constant for an add.  FIXME: this could make an 'fadd' that
3566       // reads directly from memory, but we don't support these yet.
3567       unsigned ConstReg = makeAnotherReg(Type::DoubleTy);
3568       addDirectMem(BuildMI(*BB, IP, X86::FLD32m, 4, ConstReg), Addr);
3569
3570       BuildMI(*BB, IP, X86::FpADD, 2, RealDestReg)
3571                 .addReg(ConstReg).addReg(DestReg);
3572     }
3573
3574     return;
3575   }
3576
3577   // Handle casts from floating point to integer now...
3578   if (SrcClass == cFP) {
3579     // Change the floating point control register to use "round towards zero"
3580     // mode when truncating to an integer value.
3581     //
3582     int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
3583     addFrameReference(BuildMI(*BB, IP, X86::FNSTCW16m, 4), CWFrameIdx);
3584
3585     // Load the old value of the high byte of the control word...
3586     unsigned HighPartOfCW = makeAnotherReg(Type::UByteTy);
3587     addFrameReference(BuildMI(*BB, IP, X86::MOV8rm, 4, HighPartOfCW),
3588                       CWFrameIdx, 1);
3589
3590     // Set the high part to be round to zero...
3591     addFrameReference(BuildMI(*BB, IP, X86::MOV8mi, 5),
3592                       CWFrameIdx, 1).addImm(12);
3593
3594     // Reload the modified control word now...
3595     addFrameReference(BuildMI(*BB, IP, X86::FLDCW16m, 4), CWFrameIdx);
3596
3597     // Restore the memory image of control word to original value
3598     addFrameReference(BuildMI(*BB, IP, X86::MOV8mr, 5),
3599                       CWFrameIdx, 1).addReg(HighPartOfCW);
3600
3601     // We don't have the facilities for directly storing byte sized data to
3602     // memory.  Promote it to 16 bits.  We also must promote unsigned values to
3603     // larger classes because we only have signed FP stores.
3604     unsigned StoreClass  = DestClass;
3605     const Type *StoreTy  = DestTy;
3606     if (StoreClass == cByte || DestTy->isUnsigned())
3607       switch (StoreClass) {
3608       case cByte:  StoreTy = Type::ShortTy; StoreClass = cShort; break;
3609       case cShort: StoreTy = Type::IntTy;   StoreClass = cInt;   break;
3610       case cInt:   StoreTy = Type::LongTy;  StoreClass = cLong;  break;
3611       // The following treatment of cLong may not be perfectly right,
3612       // but it survives chains of casts of the form
3613       // double->ulong->double.
3614       case cLong:  StoreTy = Type::LongTy;  StoreClass = cLong;  break;
3615       default: assert(0 && "Unknown store class!");
3616       }
3617
3618     // Spill the integer to memory and reload it from there...
3619     int FrameIdx =
3620       F->getFrameInfo()->CreateStackObject(StoreTy, TM.getTargetData());
3621
3622     static const unsigned Op1[] =
3623       { 0, X86::FIST16m, X86::FIST32m, 0, X86::FISTP64m };
3624     addFrameReference(BuildMI(*BB, IP, Op1[StoreClass], 5),
3625                       FrameIdx).addReg(SrcReg);
3626
3627     if (DestClass == cLong) {
3628       addFrameReference(BuildMI(*BB, IP, X86::MOV32rm, 4, DestReg), FrameIdx);
3629       addFrameReference(BuildMI(*BB, IP, X86::MOV32rm, 4, DestReg+1),
3630                         FrameIdx, 4);
3631     } else {
3632       static const unsigned Op2[] = { X86::MOV8rm, X86::MOV16rm, X86::MOV32rm };
3633       addFrameReference(BuildMI(*BB, IP, Op2[DestClass], 4, DestReg), FrameIdx);
3634     }
3635
3636     // Reload the original control word now...
3637     addFrameReference(BuildMI(*BB, IP, X86::FLDCW16m, 4), CWFrameIdx);
3638     return;
3639   }
3640
3641   // Anything we haven't handled already, we can't (yet) handle at all.
3642   assert(0 && "Unhandled cast instruction!");
3643   abort();
3644 }
3645
3646 /// visitVANextInst - Implement the va_next instruction...
3647 ///
3648 void X86ISel::visitVANextInst(VANextInst &I) {
3649   unsigned VAList = getReg(I.getOperand(0));
3650   unsigned DestReg = getReg(I);
3651
3652   unsigned Size;
3653   switch (I.getArgType()->getTypeID()) {
3654   default:
3655     std::cerr << I;
3656     assert(0 && "Error: bad type for va_next instruction!");
3657     return;
3658   case Type::PointerTyID:
3659   case Type::UIntTyID:
3660   case Type::IntTyID:
3661     Size = 4;
3662     break;
3663   case Type::ULongTyID:
3664   case Type::LongTyID:
3665   case Type::DoubleTyID:
3666     Size = 8;
3667     break;
3668   }
3669
3670   // Increment the VAList pointer...
3671   BuildMI(BB, X86::ADD32ri, 2, DestReg).addReg(VAList).addImm(Size);
3672 }
3673
3674 void X86ISel::visitVAArgInst(VAArgInst &I) {
3675   unsigned VAList = getReg(I.getOperand(0));
3676   unsigned DestReg = getReg(I);
3677
3678   switch (I.getType()->getTypeID()) {
3679   default:
3680     std::cerr << I;
3681     assert(0 && "Error: bad type for va_next instruction!");
3682     return;
3683   case Type::PointerTyID:
3684   case Type::UIntTyID:
3685   case Type::IntTyID:
3686     addDirectMem(BuildMI(BB, X86::MOV32rm, 4, DestReg), VAList);
3687     break;
3688   case Type::ULongTyID:
3689   case Type::LongTyID:
3690     addDirectMem(BuildMI(BB, X86::MOV32rm, 4, DestReg), VAList);
3691     addRegOffset(BuildMI(BB, X86::MOV32rm, 4, DestReg+1), VAList, 4);
3692     break;
3693   case Type::DoubleTyID:
3694     addDirectMem(BuildMI(BB, X86::FLD64m, 4, DestReg), VAList);
3695     break;
3696   }
3697 }
3698
3699 /// visitGetElementPtrInst - instruction-select GEP instructions
3700 ///
3701 void X86ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
3702   // If this GEP instruction will be folded into all of its users, we don't need
3703   // to explicitly calculate it!
3704   X86AddressMode AM;
3705   if (isGEPFoldable(0, I.getOperand(0), I.op_begin()+1, I.op_end(), AM)) {
3706     // Check all of the users of the instruction to see if they are loads and
3707     // stores.
3708     bool AllWillFold = true;
3709     for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI)
3710       if (cast<Instruction>(*UI)->getOpcode() != Instruction::Load)
3711         if (cast<Instruction>(*UI)->getOpcode() != Instruction::Store ||
3712             cast<Instruction>(*UI)->getOperand(0) == &I) {
3713           AllWillFold = false;
3714           break;
3715         }
3716
3717     // If the instruction is foldable, and will be folded into all users, don't
3718     // emit it!
3719     if (AllWillFold) return;
3720   }
3721
3722   unsigned outputReg = getReg(I);
3723   emitGEPOperation(BB, BB->end(), I.getOperand(0),
3724                    I.op_begin()+1, I.op_end(), outputReg);
3725 }
3726
3727 /// getGEPIndex - Inspect the getelementptr operands specified with GEPOps and
3728 /// GEPTypes (the derived types being stepped through at each level).  On return
3729 /// from this function, if some indexes of the instruction are representable as
3730 /// an X86 lea instruction, the machine operands are put into the Ops
3731 /// instruction and the consumed indexes are poped from the GEPOps/GEPTypes
3732 /// lists.  Otherwise, GEPOps.size() is returned.  If this returns a an
3733 /// addressing mode that only partially consumes the input, the BaseReg input of
3734 /// the addressing mode must be left free.
3735 ///
3736 /// Note that there is one fewer entry in GEPTypes than there is in GEPOps.
3737 ///
3738 void X86ISel::getGEPIndex(MachineBasicBlock *MBB,
3739                           MachineBasicBlock::iterator IP,
3740                           std::vector<Value*> &GEPOps,
3741                           std::vector<const Type*> &GEPTypes,
3742                           X86AddressMode &AM) {
3743   const TargetData &TD = TM.getTargetData();
3744
3745   // Clear out the state we are working with...
3746   AM.BaseType = X86AddressMode::RegBase;
3747   AM.Base.Reg = 0;   // No base register
3748   AM.Scale = 1;      // Unit scale
3749   AM.IndexReg = 0;   // No index register
3750   AM.Disp = 0;       // No displacement
3751
3752   // While there are GEP indexes that can be folded into the current address,
3753   // keep processing them.
3754   while (!GEPTypes.empty()) {
3755     if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) {
3756       // It's a struct access.  CUI is the index into the structure,
3757       // which names the field. This index must have unsigned type.
3758       const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back());
3759
3760       // Use the TargetData structure to pick out what the layout of the
3761       // structure is in memory.  Since the structure index must be constant, we
3762       // can get its value and use it to find the right byte offset from the
3763       // StructLayout class's list of structure member offsets.
3764       AM.Disp += TD.getStructLayout(StTy)->MemberOffsets[CUI->getValue()];
3765       GEPOps.pop_back();        // Consume a GEP operand
3766       GEPTypes.pop_back();
3767     } else {
3768       // It's an array or pointer access: [ArraySize x ElementType].
3769       const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());
3770       Value *idx = GEPOps.back();
3771
3772       // idx is the index into the array.  Unlike with structure
3773       // indices, we may not know its actual value at code-generation
3774       // time.
3775
3776       // If idx is a constant, fold it into the offset.
3777       unsigned TypeSize = TD.getTypeSize(SqTy->getElementType());
3778       if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
3779         AM.Disp += TypeSize*CSI->getValue();
3780       } else if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(idx)) {
3781         AM.Disp += TypeSize*CUI->getValue();
3782       } else {
3783         // If the index reg is already taken, we can't handle this index.
3784         if (AM.IndexReg) return;
3785
3786         // If this is a size that we can handle, then add the index as
3787         switch (TypeSize) {
3788         case 1: case 2: case 4: case 8:
3789           // These are all acceptable scales on X86.
3790           AM.Scale = TypeSize;
3791           break;
3792         default:
3793           // Otherwise, we can't handle this scale
3794           return;
3795         }
3796
3797         if (CastInst *CI = dyn_cast<CastInst>(idx))
3798           if (CI->getOperand(0)->getType() == Type::IntTy ||
3799               CI->getOperand(0)->getType() == Type::UIntTy)
3800             idx = CI->getOperand(0);
3801
3802         AM.IndexReg = MBB ? getReg(idx, MBB, IP) : 1;
3803       }
3804
3805       GEPOps.pop_back();        // Consume a GEP operand
3806       GEPTypes.pop_back();
3807     }
3808   }
3809
3810   // GEPTypes is empty, which means we have a single operand left.  Set it as
3811   // the base register.
3812   //
3813   assert(AM.Base.Reg == 0);
3814
3815   if (AllocaInst *AI = dyn_castFixedAlloca(GEPOps.back())) {
3816     AM.BaseType = X86AddressMode::FrameIndexBase;
3817     AM.Base.FrameIndex = getFixedSizedAllocaFI(AI);
3818     GEPOps.pop_back();
3819     return;
3820   }
3821
3822   if (GlobalValue *GV = dyn_cast<GlobalValue>(GEPOps.back())) {
3823     AM.GV = GV;
3824     GEPOps.pop_back();
3825     return;
3826   }
3827
3828   AM.Base.Reg = MBB ? getReg(GEPOps[0], MBB, IP) : 1;
3829   GEPOps.pop_back();        // Consume the last GEP operand
3830 }
3831
3832
3833 /// isGEPFoldable - Return true if the specified GEP can be completely
3834 /// folded into the addressing mode of a load/store or lea instruction.
3835 bool X86ISel::isGEPFoldable(MachineBasicBlock *MBB,
3836                             Value *Src, User::op_iterator IdxBegin,
3837                             User::op_iterator IdxEnd, X86AddressMode &AM) {
3838
3839   std::vector<Value*> GEPOps;
3840   GEPOps.resize(IdxEnd-IdxBegin+1);
3841   GEPOps[0] = Src;
3842   std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1);
3843
3844   std::vector<const Type*>
3845     GEPTypes(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
3846              gep_type_end(Src->getType(), IdxBegin, IdxEnd));
3847
3848   MachineBasicBlock::iterator IP;
3849   if (MBB) IP = MBB->end();
3850   getGEPIndex(MBB, IP, GEPOps, GEPTypes, AM);
3851
3852   // We can fold it away iff the getGEPIndex call eliminated all operands.
3853   return GEPOps.empty();
3854 }
3855
3856 void X86ISel::emitGEPOperation(MachineBasicBlock *MBB,
3857                                MachineBasicBlock::iterator IP,
3858                                Value *Src, User::op_iterator IdxBegin,
3859                                User::op_iterator IdxEnd, unsigned TargetReg) {
3860   const TargetData &TD = TM.getTargetData();
3861
3862   // If this is a getelementptr null, with all constant integer indices, just
3863   // replace it with TargetReg = 42.
3864   if (isa<ConstantPointerNull>(Src)) {
3865     User::op_iterator I = IdxBegin;
3866     for (; I != IdxEnd; ++I)
3867       if (!isa<ConstantInt>(*I))
3868         break;
3869     if (I == IdxEnd) {   // All constant indices
3870       unsigned Offset = TD.getIndexedOffset(Src->getType(),
3871                                          std::vector<Value*>(IdxBegin, IdxEnd));
3872       BuildMI(*MBB, IP, X86::MOV32ri, 1, TargetReg).addImm(Offset);
3873       return;
3874     }
3875   }
3876
3877   std::vector<Value*> GEPOps;
3878   GEPOps.resize(IdxEnd-IdxBegin+1);
3879   GEPOps[0] = Src;
3880   std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1);
3881
3882   std::vector<const Type*> GEPTypes;
3883   GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
3884                   gep_type_end(Src->getType(), IdxBegin, IdxEnd));
3885
3886   // Keep emitting instructions until we consume the entire GEP instruction.
3887   while (!GEPOps.empty()) {
3888     unsigned OldSize = GEPOps.size();
3889     X86AddressMode AM;
3890     getGEPIndex(MBB, IP, GEPOps, GEPTypes, AM);
3891
3892     if (GEPOps.size() != OldSize) {
3893       // getGEPIndex consumed some of the input.  Build an LEA instruction here.
3894       unsigned NextTarget = 0;
3895       if (!GEPOps.empty()) {
3896         assert(AM.Base.Reg == 0 &&
3897            "getGEPIndex should have left the base register open for chaining!");
3898         NextTarget = AM.Base.Reg = makeAnotherReg(Type::UIntTy);
3899       }
3900
3901       if (AM.BaseType == X86AddressMode::RegBase &&
3902           AM.IndexReg == 0 && AM.Disp == 0 && !AM.GV)
3903         BuildMI(*MBB, IP, X86::MOV32rr, 1, TargetReg).addReg(AM.Base.Reg);
3904       else if (AM.BaseType == X86AddressMode::RegBase && AM.Base.Reg == 0 &&
3905                AM.IndexReg == 0 && AM.Disp == 0)
3906         BuildMI(*MBB, IP, X86::MOV32ri, 1, TargetReg).addGlobalAddress(AM.GV);
3907       else
3908         addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, TargetReg), AM);
3909       --IP;
3910       TargetReg = NextTarget;
3911     } else if (GEPTypes.empty()) {
3912       // The getGEPIndex operation didn't want to build an LEA.  Check to see if
3913       // all operands are consumed but the base pointer.  If so, just load it
3914       // into the register.
3915       if (GlobalValue *GV = dyn_cast<GlobalValue>(GEPOps[0])) {
3916         BuildMI(*MBB, IP, X86::MOV32ri, 1, TargetReg).addGlobalAddress(GV);
3917       } else {
3918         unsigned BaseReg = getReg(GEPOps[0], MBB, IP);
3919         BuildMI(*MBB, IP, X86::MOV32rr, 1, TargetReg).addReg(BaseReg);
3920       }
3921       break;                // we are now done
3922
3923     } else {
3924       // It's an array or pointer access: [ArraySize x ElementType].
3925       const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());
3926       Value *idx = GEPOps.back();
3927       GEPOps.pop_back();        // Consume a GEP operand
3928       GEPTypes.pop_back();
3929
3930       // Many GEP instructions use a [cast (int/uint) to LongTy] as their
3931       // operand on X86.  Handle this case directly now...
3932       if (CastInst *CI = dyn_cast<CastInst>(idx))
3933         if (CI->getOperand(0)->getType() == Type::IntTy ||
3934             CI->getOperand(0)->getType() == Type::UIntTy)
3935           idx = CI->getOperand(0);
3936
3937       // We want to add BaseReg to(idxReg * sizeof ElementType). First, we
3938       // must find the size of the pointed-to type (Not coincidentally, the next
3939       // type is the type of the elements in the array).
3940       const Type *ElTy = SqTy->getElementType();
3941       unsigned elementSize = TD.getTypeSize(ElTy);
3942
3943       // If idxReg is a constant, we don't need to perform the multiply!
3944       if (ConstantInt *CSI = dyn_cast<ConstantInt>(idx)) {
3945         if (!CSI->isNullValue()) {
3946           unsigned Offset = elementSize*CSI->getRawValue();
3947           unsigned Reg = makeAnotherReg(Type::UIntTy);
3948           BuildMI(*MBB, IP, X86::ADD32ri, 2, TargetReg)
3949                                 .addReg(Reg).addImm(Offset);
3950           --IP;            // Insert the next instruction before this one.
3951           TargetReg = Reg; // Codegen the rest of the GEP into this
3952         }
3953       } else if (elementSize == 1) {
3954         // If the element size is 1, we don't have to multiply, just add
3955         unsigned idxReg = getReg(idx, MBB, IP);
3956         unsigned Reg = makeAnotherReg(Type::UIntTy);
3957         BuildMI(*MBB, IP, X86::ADD32rr, 2,TargetReg).addReg(Reg).addReg(idxReg);
3958         --IP;            // Insert the next instruction before this one.
3959         TargetReg = Reg; // Codegen the rest of the GEP into this
3960       } else {
3961         unsigned idxReg = getReg(idx, MBB, IP);
3962         unsigned OffsetReg = makeAnotherReg(Type::UIntTy);
3963
3964         // Make sure we can back the iterator up to point to the first
3965         // instruction emitted.
3966         MachineBasicBlock::iterator BeforeIt = IP;
3967         if (IP == MBB->begin())
3968           BeforeIt = MBB->end();
3969         else
3970           --BeforeIt;
3971         doMultiplyConst(MBB, IP, OffsetReg, Type::IntTy, idxReg, elementSize);
3972
3973         // Emit an ADD to add OffsetReg to the basePtr.
3974         unsigned Reg = makeAnotherReg(Type::UIntTy);
3975         BuildMI(*MBB, IP, X86::ADD32rr, 2, TargetReg)
3976                           .addReg(Reg).addReg(OffsetReg);
3977
3978         // Step to the first instruction of the multiply.
3979         if (BeforeIt == MBB->end())
3980           IP = MBB->begin();
3981         else
3982           IP = ++BeforeIt;
3983
3984         TargetReg = Reg; // Codegen the rest of the GEP into this
3985       }
3986     }
3987   }
3988 }
3989
3990 /// visitAllocaInst - If this is a fixed size alloca, allocate space from the
3991 /// frame manager, otherwise do it the hard way.
3992 ///
3993 void X86ISel::visitAllocaInst(AllocaInst &I) {
3994   // If this is a fixed size alloca in the entry block for the function, we
3995   // statically stack allocate the space, so we don't need to do anything here.
3996   //
3997   if (dyn_castFixedAlloca(&I)) return;
3998
3999   // Find the data size of the alloca inst's getAllocatedType.
4000   const Type *Ty = I.getAllocatedType();
4001   unsigned TySize = TM.getTargetData().getTypeSize(Ty);
4002
4003   // Create a register to hold the temporary result of multiplying the type size
4004   // constant by the variable amount.
4005   unsigned TotalSizeReg = makeAnotherReg(Type::UIntTy);
4006   unsigned SrcReg1 = getReg(I.getArraySize());
4007
4008   // TotalSizeReg = mul <numelements>, <TypeSize>
4009   MachineBasicBlock::iterator MBBI = BB->end();
4010   doMultiplyConst(BB, MBBI, TotalSizeReg, Type::UIntTy, SrcReg1, TySize);
4011
4012   // AddedSize = add <TotalSizeReg>, 15
4013   unsigned AddedSizeReg = makeAnotherReg(Type::UIntTy);
4014   BuildMI(BB, X86::ADD32ri, 2, AddedSizeReg).addReg(TotalSizeReg).addImm(15);
4015
4016   // AlignedSize = and <AddedSize>, ~15
4017   unsigned AlignedSize = makeAnotherReg(Type::UIntTy);
4018   BuildMI(BB, X86::AND32ri, 2, AlignedSize).addReg(AddedSizeReg).addImm(~15);
4019
4020   // Subtract size from stack pointer, thereby allocating some space.
4021   BuildMI(BB, X86::SUB32rr, 2, X86::ESP).addReg(X86::ESP).addReg(AlignedSize);
4022
4023   // Put a pointer to the space into the result register, by copying
4024   // the stack pointer.
4025   BuildMI(BB, X86::MOV32rr, 1, getReg(I)).addReg(X86::ESP);
4026
4027   // Inform the Frame Information that we have just allocated a variable-sized
4028   // object.
4029   F->getFrameInfo()->CreateVariableSizedObject();
4030 }
4031
4032 /// visitMallocInst - Malloc instructions are code generated into direct calls
4033 /// to the library malloc.
4034 ///
4035 void X86ISel::visitMallocInst(MallocInst &I) {
4036   unsigned AllocSize = TM.getTargetData().getTypeSize(I.getAllocatedType());
4037   unsigned Arg;
4038
4039   if (ConstantUInt *C = dyn_cast<ConstantUInt>(I.getOperand(0))) {
4040     Arg = getReg(ConstantUInt::get(Type::UIntTy, C->getValue() * AllocSize));
4041   } else {
4042     Arg = makeAnotherReg(Type::UIntTy);
4043     unsigned Op0Reg = getReg(I.getOperand(0));
4044     MachineBasicBlock::iterator MBBI = BB->end();
4045     doMultiplyConst(BB, MBBI, Arg, Type::UIntTy, Op0Reg, AllocSize);
4046   }
4047
4048   std::vector<ValueRecord> Args;
4049   Args.push_back(ValueRecord(Arg, Type::UIntTy));
4050   MachineInstr *TheCall = BuildMI(X86::CALLpcrel32,
4051                                   1).addExternalSymbol("malloc", true);
4052   doCall(ValueRecord(getReg(I), I.getType()), TheCall, Args);
4053 }
4054
4055
4056 /// visitFreeInst - Free instructions are code gen'd to call the free libc
4057 /// function.
4058 ///
4059 void X86ISel::visitFreeInst(FreeInst &I) {
4060   std::vector<ValueRecord> Args;
4061   Args.push_back(ValueRecord(I.getOperand(0)));
4062   MachineInstr *TheCall = BuildMI(X86::CALLpcrel32,
4063                                   1).addExternalSymbol("free", true);
4064   doCall(ValueRecord(0, Type::VoidTy), TheCall, Args);
4065 }
4066
4067 /// createX86SimpleInstructionSelector - This pass converts an LLVM function
4068 /// into a machine code representation is a very simple peep-hole fashion.  The
4069 /// generated code sucks but the implementation is nice and simple.
4070 ///
4071 FunctionPass *llvm::createX86SimpleInstructionSelector(TargetMachine &TM) {
4072   return new X86ISel(TM);
4073 }