X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FInstSelectSimple.cpp;h=508bbc8958938232443fc373c34151844be9e2b1;hb=235aa5eba7984ba7b526f9b63273549f5a7efc80;hp=7a1f54e6061bfae23048e3f9114675b520482980;hpb=6d804f408a40ec953e690919b664513bd68001fc;p=oota-llvm.git diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index 7a1f54e6061..508bbc89589 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -35,6 +35,43 @@ using namespace llvm; namespace { Statistic<> NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added"); + + /// TypeClass - Used by the X86 backend to group LLVM types by their basic X86 + /// Representation. + /// + enum TypeClass { + cByte, cShort, cInt, cFP, cLong + }; +} + +/// getClass - Turn a primitive type into a "class" number which is based on the +/// size of the type, and whether or not it is floating point. +/// +static inline TypeClass getClass(const Type *Ty) { + switch (Ty->getPrimitiveID()) { + case Type::SByteTyID: + case Type::UByteTyID: return cByte; // Byte operands are class #0 + case Type::ShortTyID: + case Type::UShortTyID: return cShort; // Short operands are class #1 + case Type::IntTyID: + case Type::UIntTyID: + case Type::PointerTyID: return cInt; // Int's and pointers are class #2 + + case Type::FloatTyID: + case Type::DoubleTyID: return cFP; // Floating Point is #3 + + case Type::LongTyID: + case Type::ULongTyID: return cLong; // Longs are class #4 + default: + assert(0 && "Invalid type to getClass!"); + return cByte; // not reached + } +} + +// getClassB - Just like getClass, but treat boolean values as bytes. +static inline TypeClass getClassB(const Type *Ty) { + if (Ty == Type::BoolTy) return cByte; + return getClass(Ty); } namespace { @@ -151,13 +188,6 @@ namespace { void visitSimpleBinary(BinaryOperator &B, unsigned OpcodeClass); void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); } void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); } - void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, - unsigned DestReg, const Type *DestTy, - unsigned Op0Reg, unsigned Op1Reg); - void doMultiplyConst(MachineBasicBlock *MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, const Type *DestTy, - unsigned Op0Reg, unsigned Op1Val); void visitMul(BinaryOperator &B); void visitDiv(BinaryOperator &B) { visitDivRem(B); } @@ -242,10 +272,28 @@ namespace { Value *Op0, Value *Op1, unsigned OperatorClass, unsigned TargetReg); + /// emitBinaryFPOperation - This method handles emission of floating point + /// Add (0), Sub (1), Mul (2), and Div (3) operations. + void emitBinaryFPOperation(MachineBasicBlock *BB, + MachineBasicBlock::iterator IP, + Value *Op0, Value *Op1, + unsigned OperatorClass, unsigned TargetReg); + + void emitMultiply(MachineBasicBlock *BB, MachineBasicBlock::iterator IP, + Value *Op0, Value *Op1, unsigned TargetReg); + + void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, + unsigned DestReg, const Type *DestTy, + unsigned Op0Reg, unsigned Op1Reg); + void doMultiplyConst(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, const Type *DestTy, + unsigned Op0Reg, unsigned Op1Val); + void emitDivRemOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator IP, - unsigned Op0Reg, unsigned Op1Reg, bool isDiv, - const Type *Ty, unsigned TargetReg); + Value *Op0, Value *Op1, bool isDiv, + unsigned TargetReg); /// emitSetCCOperation - Common code shared between visitSetCondInst and /// constant expression support. @@ -315,22 +363,28 @@ namespace { } unsigned getReg(Value *V, MachineBasicBlock *MBB, MachineBasicBlock::iterator IPt) { - unsigned &Reg = RegMap[V]; - if (Reg == 0) { - Reg = makeAnotherReg(V->getType()); - RegMap[V] = Reg; - } - // If this operand is a constant, emit the code to copy the constant into // the register here... // if (Constant *C = dyn_cast(V)) { + unsigned Reg = makeAnotherReg(V->getType()); copyConstantToRegister(MBB, IPt, C, Reg); - RegMap.erase(V); // Assign a new name to this constant if ref'd again + return Reg; } else if (GlobalValue *GV = dyn_cast(V)) { + unsigned Reg = makeAnotherReg(V->getType()); // Move the address of the global into the register BuildMI(*MBB, IPt, X86::MOV32ri, 1, Reg).addGlobalAddress(GV); - RegMap.erase(V); // Assign a new name to this address if ref'd again + return Reg; + } else if (CastInst *CI = dyn_cast(V)) { + // Do not emit noop casts at all. + if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType())) + return getReg(CI->getOperand(0), MBB, IPt); + } + + unsigned &Reg = RegMap[V]; + if (Reg == 0) { + Reg = makeAnotherReg(V->getType()); + RegMap[V] = Reg; } return Reg; @@ -338,44 +392,6 @@ namespace { }; } -/// TypeClass - Used by the X86 backend to group LLVM types by their basic X86 -/// Representation. -/// -enum TypeClass { - cByte, cShort, cInt, cFP, cLong -}; - -/// getClass - Turn a primitive type into a "class" number which is based on the -/// size of the type, and whether or not it is floating point. -/// -static inline TypeClass getClass(const Type *Ty) { - switch (Ty->getPrimitiveID()) { - case Type::SByteTyID: - case Type::UByteTyID: return cByte; // Byte operands are class #0 - case Type::ShortTyID: - case Type::UShortTyID: return cShort; // Short operands are class #1 - case Type::IntTyID: - case Type::UIntTyID: - case Type::PointerTyID: return cInt; // Int's and pointers are class #2 - - case Type::FloatTyID: - case Type::DoubleTyID: return cFP; // Floating Point is #3 - - case Type::LongTyID: - case Type::ULongTyID: return cLong; // Longs are class #4 - default: - assert(0 && "Invalid type to getClass!"); - return cByte; // not reached - } -} - -// getClassB - Just like getClass, but treat boolean values as bytes. -static inline TypeClass getClassB(const Type *Ty) { - if (Ty == Type::BoolTy) return cByte; - return getClass(Ty); -} - - /// copyConstantToRegister - Output the instructions required to put the /// specified constant into the specified register. /// @@ -402,21 +418,15 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB, Class, R); return; - case Instruction::Mul: { - unsigned Op0Reg = getReg(CE->getOperand(0), MBB, IP); - unsigned Op1Reg = getReg(CE->getOperand(1), MBB, IP); - doMultiply(MBB, IP, R, CE->getType(), Op0Reg, Op1Reg); + case Instruction::Mul: + emitMultiply(MBB, IP, CE->getOperand(0), CE->getOperand(1), R); return; - } + case Instruction::Div: - case Instruction::Rem: { - unsigned Op0Reg = getReg(CE->getOperand(0), MBB, IP); - unsigned Op1Reg = getReg(CE->getOperand(1), MBB, IP); - emitDivRemOperation(MBB, IP, Op0Reg, Op1Reg, - CE->getOpcode() == Instruction::Div, - CE->getType(), R); + case Instruction::Rem: + emitDivRemOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1), + CE->getOpcode() == Instruction::Div, R); return; - } case Instruction::SetNE: case Instruction::SetEQ: @@ -511,39 +521,51 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) { MachineFrameInfo *MFI = F->getFrameInfo(); for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) { - unsigned Reg = getReg(*I); - + bool ArgLive = !I->use_empty(); + unsigned Reg = ArgLive ? getReg(*I) : 0; int FI; // Frame object index + switch (getClassB(I->getType())) { case cByte: - FI = MFI->CreateFixedObject(1, ArgOffset); - addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Reg), FI); + if (ArgLive) { + FI = MFI->CreateFixedObject(1, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Reg), FI); + } break; case cShort: - FI = MFI->CreateFixedObject(2, ArgOffset); - addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Reg), FI); + if (ArgLive) { + FI = MFI->CreateFixedObject(2, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Reg), FI); + } break; case cInt: - FI = MFI->CreateFixedObject(4, ArgOffset); - addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI); + if (ArgLive) { + FI = MFI->CreateFixedObject(4, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI); + } break; case cLong: - FI = MFI->CreateFixedObject(8, ArgOffset); - addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI); - addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg+1), FI, 4); + if (ArgLive) { + FI = MFI->CreateFixedObject(8, ArgOffset); + addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI); + addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg+1), FI, 4); + } ArgOffset += 4; // longs require 4 additional bytes break; case cFP: - unsigned Opcode; - if (I->getType() == Type::FloatTy) { - Opcode = X86::FLD32m; - FI = MFI->CreateFixedObject(4, ArgOffset); - } else { - Opcode = X86::FLD64m; - FI = MFI->CreateFixedObject(8, ArgOffset); - ArgOffset += 4; // doubles require 4 additional bytes + if (ArgLive) { + unsigned Opcode; + if (I->getType() == Type::FloatTy) { + Opcode = X86::FLD32m; + FI = MFI->CreateFixedObject(4, ArgOffset); + } else { + Opcode = X86::FLD64m; + FI = MFI->CreateFixedObject(8, ArgOffset); + } + addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI); } - addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI); + if (I->getType() == Type::DoubleTy) + ArgOffset += 4; // doubles require 4 additional bytes break; default: assert(0 && "Unhandled argument type!"); @@ -721,9 +743,9 @@ void ISel::InsertFPRegKills() { // If we haven't found an FP register use or def in this basic block, check // to see if any of our successors has an FP PHI node, which will cause a // copy to be inserted into this block. - for (succ_const_iterator SI = succ_begin(BB->getBasicBlock()), - E = succ_end(BB->getBasicBlock()); SI != E; ++SI) { - MachineBasicBlock *SBB = MBBMap[*SI]; + for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SBB = *SI; for (MachineBasicBlock::iterator I = SBB->begin(); I != SBB->end() && I->getOpcode() == X86::PHI; ++I) { if (RegMap.getRegClass(I->getOperand(0).getReg())->getSize() == 10) @@ -907,9 +929,13 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r); break; case cFP: - BuildMI(*MBB, IP, X86::FpUCOM, 2).addReg(Op0r).addReg(Op1r); - BuildMI(*MBB, IP, X86::FNSTSW8r, 0); - BuildMI(*MBB, IP, X86::SAHF, 1); + if (0) { // for processors prior to the P6 + BuildMI(*MBB, IP, X86::FpUCOM, 2).addReg(Op0r).addReg(Op1r); + BuildMI(*MBB, IP, X86::FNSTSW8r, 0); + BuildMI(*MBB, IP, X86::SAHF, 1); + } else { + BuildMI(*MBB, IP, X86::FpUCOMI, 2).addReg(Op0r).addReg(Op1r); + } break; case cLong: @@ -1012,7 +1038,18 @@ void ISel::emitSelectOperation(MachineBasicBlock *MBB, FalseVal = ConstantExpr::getCast(F, Type::ShortTy); } - + unsigned TrueReg = getReg(TrueVal, MBB, IP); + unsigned FalseReg = getReg(FalseVal, MBB, IP); + if (TrueReg == FalseReg) { + static const unsigned Opcode[] = { + X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::MOV32rr + }; + BuildMI(*MBB, IP, Opcode[SelectClass], 1, DestReg).addReg(TrueReg); + if (SelectClass == cLong) + BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(TrueReg+1); + return; + } + unsigned Opcode; if (SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(Cond)) { // We successfully folded the setcc into the select instruction. @@ -1104,8 +1141,6 @@ void ISel::emitSelectOperation(MachineBasicBlock *MBB, } } - unsigned TrueReg = getReg(TrueVal, MBB, IP); - unsigned FalseReg = getReg(FalseVal, MBB, IP); unsigned RealDestReg = DestReg; @@ -1260,6 +1295,11 @@ static inline BasicBlock *getBlockAfter(BasicBlock *BB) { /// just make a fall-through (but we don't currently). /// void ISel::visitBranchInst(BranchInst &BI) { + // Update machine-CFG edges + BB->addSuccessor (MBBMap[BI.getSuccessor(0)]); + if (BI.isConditional()) + BB->addSuccessor (MBBMap[BI.getSuccessor(1)]); + BasicBlock *NextBB = getBlockAfter(BI.getParent()); // BB after current one if (!BI.isConditional()) { // Unconditional branch? @@ -1506,6 +1546,25 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) { case Intrinsic::writeport: // We directly implement these intrinsics break; + case Intrinsic::readio: { + // On X86, memory operations are in-order. Lower this intrinsic + // into a volatile load. + Instruction *Before = CI->getPrev(); + LoadInst * LI = new LoadInst (CI->getOperand(1), "", true, CI); + CI->replaceAllUsesWith (LI); + BB->getInstList().erase (CI); + break; + } + case Intrinsic::writeio: { + // On X86, memory operations are in-order. Lower this intrinsic + // into a volatile store. + Instruction *Before = CI->getPrev(); + StoreInst * LI = new StoreInst (CI->getOperand(1), + CI->getOperand(2), true, CI); + CI->replaceAllUsesWith (LI); + BB->getInstList().erase (CI); + break; + } default: // All other intrinsic calls we must lower. Instruction *Before = CI->getPrev(); @@ -1665,72 +1724,105 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) { return; } - case Intrinsic::readport: + case Intrinsic::readport: { + // First, determine that the size of the operand falls within the acceptable + // range for this architecture. // - // First, determine that the size of the operand falls within the - // acceptable range for this architecture. - // - if ((CI.getOperand(1)->getType()->getPrimitiveSize()) != 2) { + if (getClassB(CI.getOperand(1)->getType()) != cShort) { std::cerr << "llvm.readport: Address size is not 16 bits\n"; - exit (1); + exit(1); } - // // Now, move the I/O port address into the DX register and use the IN // instruction to get the input data. // - BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(getReg(CI.getOperand(1))); - switch (CI.getCalledFunction()->getReturnType()->getPrimitiveSize()) { - case 1: - BuildMI(BB, X86::IN8, 0); - break; - case 2: - BuildMI(BB, X86::IN16, 0); - break; - case 4: - BuildMI(BB, X86::IN32, 0); - break; - default: - std::cerr << "Cannot do input on this data type"; - exit (1); + unsigned Class = getClass(CI.getCalledFunction()->getReturnType()); + unsigned DestReg = getReg(CI); + + // If the port is a single-byte constant, use the immediate form. + if (ConstantInt *C = dyn_cast(CI.getOperand(1))) + if ((C->getRawValue() & 255) == C->getRawValue()) { + switch (Class) { + case cByte: + BuildMI(BB, X86::IN8ri, 1).addImm((unsigned char)C->getRawValue()); + BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL); + return; + case cShort: + BuildMI(BB, X86::IN16ri, 1).addImm((unsigned char)C->getRawValue()); + BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX); + return; + case cInt: + BuildMI(BB, X86::IN32ri, 1).addImm((unsigned char)C->getRawValue()); + BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX); + return; + } + } + + unsigned Reg = getReg(CI.getOperand(1)); + BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg); + switch (Class) { + case cByte: + BuildMI(BB, X86::IN8rr, 0); + BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL); + break; + case cShort: + BuildMI(BB, X86::IN16rr, 0); + BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX); + break; + case cInt: + BuildMI(BB, X86::IN32rr, 0); + BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX); + break; + default: + std::cerr << "Cannot do input on this data type"; + exit (1); } return; + } - case Intrinsic::writeport: - // + case Intrinsic::writeport: { // First, determine that the size of the operand falls within the // acceptable range for this architecture. - // - // - if ((CI.getOperand(2)->getType()->getPrimitiveSize()) != 2) { + if (getClass(CI.getOperand(2)->getType()) != cShort) { std::cerr << "llvm.writeport: Address size is not 16 bits\n"; - exit (1); + exit(1); } - // - // Now, move the I/O port address into the DX register and the value to - // write into the AL/AX/EAX register. - // - BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(getReg(CI.getOperand(2))); - switch (CI.getOperand(1)->getType()->getPrimitiveSize()) { - case 1: - BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(getReg(CI.getOperand(1))); - BuildMI(BB, X86::OUT8, 0); - break; - case 2: - BuildMI(BB, X86::MOV16rr, 1, X86::AX).addReg(getReg(CI.getOperand(1))); - BuildMI(BB, X86::OUT16, 0); - break; - case 4: - BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(getReg(CI.getOperand(1))); - BuildMI(BB, X86::OUT32, 0); - break; - default: - std::cerr << "Cannot do output on this data type"; - exit (1); + unsigned Class = getClassB(CI.getOperand(1)->getType()); + unsigned ValReg = getReg(CI.getOperand(1)); + switch (Class) { + case cByte: + BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg); + break; + case cShort: + BuildMI(BB, X86::MOV16rr, 1, X86::AX).addReg(ValReg); + break; + case cInt: + BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(ValReg); + break; + default: + std::cerr << "llvm.writeport: invalid data type for X86 target"; + exit(1); } - return; + + // If the port is a single-byte constant, use the immediate form. + if (ConstantInt *C = dyn_cast(CI.getOperand(2))) + if ((C->getRawValue() & 255) == C->getRawValue()) { + static const unsigned O[] = { X86::OUT8ir, X86::OUT16ir, X86::OUT32ir }; + BuildMI(BB, O[Class], 1).addImm((unsigned char)C->getRawValue()); + return; + } + + // Otherwise, move the I/O port address into the DX register and the value + // to write into the AL/AX/EAX register. + static const unsigned Opc[] = { X86::OUT8rr, X86::OUT16rr, X86::OUT32rr }; + unsigned Reg = getReg(CI.getOperand(2)); + BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg); + BuildMI(BB, Opc[Class], 0); + return; + } + default: assert(0 && "Error: unknown intrinsics should have been lowered!"); } } @@ -1748,12 +1840,15 @@ static bool isSafeToFoldLoadIntoInstruction(LoadInst &LI, Instruction &User) { case Instruction::Call: case Instruction::Invoke: return false; + case Instruction::Load: + if (cast(It)->isVolatile() && LI.isVolatile()) + return false; + break; } } return true; } - /// visitSimpleBinary - Implement simple binary operators for integral types... /// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for /// Xor. @@ -1769,22 +1864,31 @@ void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) { std::swap(Op0, Op1); // Make sure any loads are in the RHS. unsigned Class = getClassB(B.getType()); - if (isa(Op1) && Class < cFP && + if (isa(Op1) && Class != cLong && isSafeToFoldLoadIntoInstruction(*cast(Op1), B)) { - static const unsigned OpcodeTab[][3] = { - // Arithmetic operators - { X86::ADD8rm, X86::ADD16rm, X86::ADD32rm }, // ADD - { X86::SUB8rm, X86::SUB16rm, X86::SUB32rm }, // SUB - - // Bitwise operators - { X86::AND8rm, X86::AND16rm, X86::AND32rm }, // AND - { X86:: OR8rm, X86:: OR16rm, X86:: OR32rm }, // OR - { X86::XOR8rm, X86::XOR16rm, X86::XOR32rm }, // XOR - }; - - assert(Class < cFP && "General code handles 64-bit integer types!"); - unsigned Opcode = OpcodeTab[OperatorClass][Class]; + unsigned Opcode; + if (Class != cFP) { + static const unsigned OpcodeTab[][3] = { + // Arithmetic operators + { X86::ADD8rm, X86::ADD16rm, X86::ADD32rm }, // ADD + { X86::SUB8rm, X86::SUB16rm, X86::SUB32rm }, // SUB + + // Bitwise operators + { X86::AND8rm, X86::AND16rm, X86::AND32rm }, // AND + { X86:: OR8rm, X86:: OR16rm, X86:: OR32rm }, // OR + { X86::XOR8rm, X86::XOR16rm, X86::XOR32rm }, // XOR + }; + Opcode = OpcodeTab[OperatorClass][Class]; + } else { + static const unsigned OpcodeTab[][2] = { + { X86::FADD32m, X86::FADD64m }, // ADD + { X86::FSUB32m, X86::FSUB64m }, // SUB + }; + const Type *Ty = Op0->getType(); + assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!"); + Opcode = OpcodeTab[OperatorClass][Ty == Type::DoubleTy]; + } unsigned BaseReg, Scale, IndexReg, Disp; getAddressingMode(cast(Op1)->getOperand(0), BaseReg, @@ -1796,9 +1900,96 @@ void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) { return; } + // If this is a floating point subtract, check to see if we can fold the first + // operand in. + if (Class == cFP && OperatorClass == 1 && + isa(Op0) && + isSafeToFoldLoadIntoInstruction(*cast(Op0), B)) { + const Type *Ty = Op0->getType(); + assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!"); + unsigned Opcode = Ty == Type::FloatTy ? X86::FSUBR32m : X86::FSUBR64m; + + unsigned BaseReg, Scale, IndexReg, Disp; + getAddressingMode(cast(Op0)->getOperand(0), BaseReg, + Scale, IndexReg, Disp); + + unsigned Op1r = getReg(Op1); + addFullAddress(BuildMI(BB, Opcode, 2, DestReg).addReg(Op1r), + BaseReg, Scale, IndexReg, Disp); + return; + } + emitSimpleBinaryOperation(BB, MI, Op0, Op1, OperatorClass, DestReg); } + +/// emitBinaryFPOperation - This method handles emission of floating point +/// Add (0), Sub (1), Mul (2), and Div (3) operations. +void ISel::emitBinaryFPOperation(MachineBasicBlock *BB, + MachineBasicBlock::iterator IP, + Value *Op0, Value *Op1, + unsigned OperatorClass, unsigned DestReg) { + + // Special case: op Reg, + if (ConstantFP *Op1C = dyn_cast(Op1)) + if (!Op1C->isExactlyValue(+0.0) && !Op1C->isExactlyValue(+1.0)) { + // Create a constant pool entry for this constant. + MachineConstantPool *CP = F->getConstantPool(); + unsigned CPI = CP->getConstantPoolIndex(Op1C); + const Type *Ty = Op1->getType(); + + static const unsigned OpcodeTab[][4] = { + { X86::FADD32m, X86::FSUB32m, X86::FMUL32m, X86::FDIV32m }, // Float + { X86::FADD64m, X86::FSUB64m, X86::FMUL64m, X86::FDIV64m }, // Double + }; + + assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!"); + unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass]; + unsigned Op0r = getReg(Op0, BB, IP); + addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5, + DestReg).addReg(Op0r), CPI); + return; + } + + // Special case: R1 = op , R2 + if (ConstantFP *CFP = dyn_cast(Op0)) + if (CFP->isExactlyValue(-0.0) && OperatorClass == 1) { + // -0.0 - X === -X + unsigned op1Reg = getReg(Op1, BB, IP); + BuildMI(*BB, IP, X86::FCHS, 1, DestReg).addReg(op1Reg); + return; + } else if (!CFP->isExactlyValue(+0.0) && !CFP->isExactlyValue(+1.0)) { + // R1 = op CST, R2 --> R1 = opr R2, CST + + // Create a constant pool entry for this constant. + MachineConstantPool *CP = F->getConstantPool(); + unsigned CPI = CP->getConstantPoolIndex(CFP); + const Type *Ty = CFP->getType(); + + static const unsigned OpcodeTab[][4] = { + { X86::FADD32m, X86::FSUBR32m, X86::FMUL32m, X86::FDIVR32m }, // Float + { X86::FADD64m, X86::FSUBR64m, X86::FMUL64m, X86::FDIVR64m }, // Double + }; + + assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!"); + unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass]; + unsigned Op1r = getReg(Op1, BB, IP); + addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5, + DestReg).addReg(Op1r), CPI); + return; + } + + // General case. + static const unsigned OpcodeTab[4] = { + X86::FpADD, X86::FpSUB, X86::FpMUL, X86::FpDIV + }; + + unsigned Opcode = OpcodeTab[OperatorClass]; + unsigned Op0r = getReg(Op0, BB, IP); + unsigned Op1r = getReg(Op1, BB, IP); + BuildMI(*BB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r); +} + /// emitSimpleBinaryOperation - Implement simple binary operators for integral /// types... OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for /// Or, 4 for Xor. @@ -1812,37 +2003,34 @@ void ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB, unsigned OperatorClass, unsigned DestReg) { unsigned Class = getClassB(Op0->getType()); + if (Class == cFP) { + assert(OperatorClass < 2 && "No logical ops for FP!"); + emitBinaryFPOperation(MBB, IP, Op0, Op1, OperatorClass, DestReg); + return; + } + // sub 0, X -> neg X - if (OperatorClass == 1) - if (ConstantInt *CI = dyn_cast(Op0)) { - if (CI->isNullValue()) { - unsigned op1Reg = getReg(Op1, MBB, IP); - static unsigned const NEGTab[] = { - X86::NEG8r, X86::NEG16r, X86::NEG32r, 0, X86::NEG32r - }; - BuildMI(*MBB, IP, NEGTab[Class], 1, DestReg).addReg(op1Reg); - - if (Class == cLong) { - // We just emitted: Dl = neg Sl - // Now emit : T = addc Sh, 0 - // : Dh = neg T - unsigned T = makeAnotherReg(Type::IntTy); - BuildMI(*MBB, IP, X86::ADC32ri, 2, T).addReg(op1Reg+1).addImm(0); - BuildMI(*MBB, IP, X86::NEG32r, 1, DestReg+1).addReg(T); - } - return; - } - } else if (ConstantFP *CFP = dyn_cast(Op0)) - if (CFP->isExactlyValue(-0.0)) { - // -0.0 - X === -X - unsigned op1Reg = getReg(Op1, MBB, IP); - BuildMI(*MBB, IP, X86::FCHS, 1, DestReg).addReg(op1Reg); - return; + if (ConstantInt *CI = dyn_cast(Op0)) + if (OperatorClass == 1 && CI->isNullValue()) { + unsigned op1Reg = getReg(Op1, MBB, IP); + static unsigned const NEGTab[] = { + X86::NEG8r, X86::NEG16r, X86::NEG32r, 0, X86::NEG32r + }; + BuildMI(*MBB, IP, NEGTab[Class], 1, DestReg).addReg(op1Reg); + + if (Class == cLong) { + // We just emitted: Dl = neg Sl + // Now emit : T = addc Sh, 0 + // : Dh = neg T + unsigned T = makeAnotherReg(Type::IntTy); + BuildMI(*MBB, IP, X86::ADC32ri, 2, T).addReg(op1Reg+1).addImm(0); + BuildMI(*MBB, IP, X86::NEG32r, 1, DestReg+1).addReg(T); } + return; + } - // Special case: op Reg, - if (isa(Op1)) { - ConstantInt *Op1C = cast(Op1); + // Special case: op Reg, + if (ConstantInt *Op1C = dyn_cast(Op1)) { unsigned Op0r = getReg(Op0, MBB, IP); // xor X, -1 -> not X @@ -1891,56 +2079,56 @@ void ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB, if (Class != cLong) { BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l); return; - } else { - // If this is a long value and the high or low bits have a special - // property, emit some special cases. - unsigned Op1h = cast(Op1C)->getRawValue() >> 32LL; - - // If the constant is zero in the low 32-bits, just copy the low part - // across and apply the normal 32-bit operation to the high parts. There - // will be no carry or borrow into the top. - if (Op1l == 0) { - if (OperatorClass != 2) // All but and... - BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0r); - else - BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0); - BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg+1) - .addReg(Op0r+1).addImm(Op1h); - return; - } - - // If this is a logical operation and the top 32-bits are zero, just - // operate on the lower 32. - if (Op1h == 0 && OperatorClass > 1) { - BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg) - .addReg(Op0r).addImm(Op1l); - if (OperatorClass != 2) // All but and - BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(Op0r+1); - else - BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0); - return; - } - - // TODO: We could handle lots of other special cases here, such as AND'ing - // with 0xFFFFFFFF00000000 -> noop, etc. - - // Otherwise, code generate the full operation with a constant. - static const unsigned TopTab[] = { - X86::ADC32ri, X86::SBB32ri, X86::AND32ri, X86::OR32ri, X86::XOR32ri - }; - - BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l); - BuildMI(*MBB, IP, TopTab[OperatorClass], 2, DestReg+1) - .addReg(Op0r+1).addImm(Op1h); + } + + // If this is a long value and the high or low bits have a special + // property, emit some special cases. + unsigned Op1h = cast(Op1C)->getRawValue() >> 32LL; + + // If the constant is zero in the low 32-bits, just copy the low part + // across and apply the normal 32-bit operation to the high parts. There + // will be no carry or borrow into the top. + if (Op1l == 0) { + if (OperatorClass != 2) // All but and... + BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0r); + else + BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0); + BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg+1) + .addReg(Op0r+1).addImm(Op1h); return; } + + // If this is a logical operation and the top 32-bits are zero, just + // operate on the lower 32. + if (Op1h == 0 && OperatorClass > 1) { + BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg) + .addReg(Op0r).addImm(Op1l); + if (OperatorClass != 2) // All but and + BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(Op0r+1); + else + BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0); + return; + } + + // TODO: We could handle lots of other special cases here, such as AND'ing + // with 0xFFFFFFFF00000000 -> noop, etc. + + // Otherwise, code generate the full operation with a constant. + static const unsigned TopTab[] = { + X86::ADC32ri, X86::SBB32ri, X86::AND32ri, X86::OR32ri, X86::XOR32ri + }; + + BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l); + BuildMI(*MBB, IP, TopTab[OperatorClass], 2, DestReg+1) + .addReg(Op0r+1).addImm(Op1h); + return; } // Finally, handle the general case now. static const unsigned OpcodeTab[][5] = { // Arithmetic operators - { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, X86::FpADD, X86::ADD32rr },// ADD - { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, X86::FpSUB, X86::SUB32rr },// SUB + { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, 0, X86::ADD32rr }, // ADD + { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, 0, X86::SUB32rr }, // SUB // Bitwise operators { X86::AND8rr, X86::AND16rr, X86::AND32rr, 0, X86::AND32rr }, // AND @@ -1949,7 +2137,6 @@ void ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB, }; unsigned Opcode = OpcodeTab[OperatorClass][Class]; - assert(Opcode && "Floating point arguments to logical inst?"); unsigned Op0r = getReg(Op0, MBB, IP); unsigned Op1r = getReg(Op1, MBB, IP); BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r); @@ -1972,9 +2159,6 @@ void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, unsigned op0Reg, unsigned op1Reg) { unsigned Class = getClass(DestTy); switch (Class) { - case cFP: // Floating point multiply - BuildMI(*MBB, MBBI, X86::FpMUL, 2, DestReg).addReg(op0Reg).addReg(op1Reg); - return; case cInt: case cShort: BuildMI(*MBB, MBBI, Class == cInt ? X86::IMUL32rr:X86::IMUL16rr, 2, DestReg) @@ -2004,6 +2188,9 @@ static unsigned ExactLog2(unsigned Val) { return Count+1; } + +/// doMultiplyConst - This function is specialized to efficiently codegen an 8, +/// 16, or 32-bit integer multiply by a constant. void ISel::doMultiplyConst(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, unsigned DestReg, const Type *DestTy, @@ -2058,97 +2245,141 @@ void ISel::doMultiplyConst(MachineBasicBlock *MBB, /// with the EAX register explicitly. /// void ISel::visitMul(BinaryOperator &I) { - unsigned Op0Reg = getReg(I.getOperand(0)); - unsigned DestReg = getReg(I); + unsigned ResultReg = getReg(I); + + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + + // Fold loads into floating point multiplies. + if (getClass(Op0->getType()) == cFP) { + if (isa(Op0) && !isa(Op1)) + if (!I.swapOperands()) + std::swap(Op0, Op1); // Make sure any loads are in the RHS. + if (LoadInst *LI = dyn_cast(Op1)) + if (isSafeToFoldLoadIntoInstruction(*LI, I)) { + const Type *Ty = Op0->getType(); + assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!"); + unsigned Opcode = Ty == Type::FloatTy ? X86::FMUL32m : X86::FMUL64m; + + unsigned BaseReg, Scale, IndexReg, Disp; + getAddressingMode(LI->getOperand(0), BaseReg, + Scale, IndexReg, Disp); + + unsigned Op0r = getReg(Op0); + addFullAddress(BuildMI(BB, Opcode, 2, ResultReg).addReg(Op0r), + BaseReg, Scale, IndexReg, Disp); + return; + } + } + + MachineBasicBlock::iterator IP = BB->end(); + emitMultiply(BB, IP, Op0, Op1, ResultReg); +} + +void ISel::emitMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, + Value *Op0, Value *Op1, unsigned DestReg) { + MachineBasicBlock &BB = *MBB; + TypeClass Class = getClass(Op0->getType()); // Simple scalar multiply? - if (getClass(I.getType()) != cLong) { - if (ConstantInt *CI = dyn_cast(I.getOperand(1))) { - unsigned Val = (unsigned)CI->getRawValue(); // Cannot be 64-bit constant - MachineBasicBlock::iterator MBBI = BB->end(); - doMultiplyConst(BB, MBBI, DestReg, I.getType(), Op0Reg, Val); + unsigned Op0Reg = getReg(Op0, &BB, IP); + switch (Class) { + case cByte: + case cShort: + case cInt: + if (ConstantInt *CI = dyn_cast(Op1)) { + unsigned Val = (unsigned)CI->getRawValue(); // Isn't a 64-bit constant + doMultiplyConst(&BB, IP, DestReg, Op0->getType(), Op0Reg, Val); } else { - unsigned Op1Reg = getReg(I.getOperand(1)); - MachineBasicBlock::iterator MBBI = BB->end(); - doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg); + unsigned Op1Reg = getReg(Op1, &BB, IP); + doMultiply(&BB, IP, DestReg, Op1->getType(), Op0Reg, Op1Reg); } - } else { - // Long value. We have to do things the hard way... - if (ConstantInt *CI = dyn_cast(I.getOperand(1))) { - unsigned CLow = CI->getRawValue(); - unsigned CHi = CI->getRawValue() >> 32; - - if (CLow == 0) { - // If the low part of the constant is all zeros, things are simple. - BuildMI(BB, X86::MOV32ri, 1, DestReg).addImm(0); - doMultiplyConst(BB, BB->end(), DestReg+1, Type::UIntTy, Op0Reg, CHi); - return; - } + return; + case cFP: + emitBinaryFPOperation(MBB, IP, Op0, Op1, 2, DestReg); + return; + case cLong: + break; + } - // Multiply the two low parts... capturing carry into EDX - unsigned OverflowReg = 0; - if (CLow == 1) { - BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(Op0Reg); - } else { - unsigned Op1RegL = makeAnotherReg(Type::UIntTy); - OverflowReg = makeAnotherReg(Type::UIntTy); - BuildMI(BB, X86::MOV32ri, 1, Op1RegL).addImm(CLow); - BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); - BuildMI(BB, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL - - BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL - BuildMI(BB, X86::MOV32rr, 1,OverflowReg).addReg(X86::EDX);// AL*BL >> 32 - } - - unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL - doMultiplyConst(BB, BB->end(), AHBLReg, Type::UIntTy, Op0Reg+1, CLow); - - unsigned AHBLplusOverflowReg; - if (OverflowReg) { - AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); - BuildMI(BB, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) - AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); - } else { - AHBLplusOverflowReg = AHBLReg; - } - - if (CHi == 0) { - BuildMI(BB, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg); - } else { - unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH - doMultiplyConst(BB, BB->end(), ALBHReg, Type::UIntTy, Op0Reg, CHi); - - BuildMI(BB, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) - DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); - } + // Long value. We have to do things the hard way... + if (ConstantInt *CI = dyn_cast(Op1)) { + unsigned CLow = CI->getRawValue(); + unsigned CHi = CI->getRawValue() >> 32; + + if (CLow == 0) { + // If the low part of the constant is all zeros, things are simple. + BuildMI(BB, IP, X86::MOV32ri, 1, DestReg).addImm(0); + doMultiplyConst(&BB, IP, DestReg+1, Type::UIntTy, Op0Reg, CHi); + return; + } + + // Multiply the two low parts... capturing carry into EDX + unsigned OverflowReg = 0; + if (CLow == 1) { + BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0Reg); } else { - unsigned Op1Reg = getReg(I.getOperand(1)); - // Multiply the two low parts... capturing carry into EDX - BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); - BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL - - unsigned OverflowReg = makeAnotherReg(Type::UIntTy); - BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL - BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32 - - MachineBasicBlock::iterator MBBI = BB->end(); - unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL - BuildMI(*BB, MBBI, X86::IMUL32rr, 2, - AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg); + unsigned Op1RegL = makeAnotherReg(Type::UIntTy); + OverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, IP, X86::MOV32ri, 1, Op1RegL).addImm(CLow); + BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); + BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL - unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); - BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) + BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL + BuildMI(BB, IP, X86::MOV32rr, 1, + OverflowReg).addReg(X86::EDX); // AL*BL >> 32 + } + + unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL + doMultiplyConst(&BB, IP, AHBLReg, Type::UIntTy, Op0Reg+1, CLow); + + unsigned AHBLplusOverflowReg; + if (OverflowReg) { + AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, IP, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); - - MBBI = BB->end(); + } else { + AHBLplusOverflowReg = AHBLReg; + } + + if (CHi == 0) { + BuildMI(BB, IP, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg); + } else { unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH - BuildMI(*BB, MBBI, X86::IMUL32rr, 2, - ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1); + doMultiplyConst(&BB, IP, ALBHReg, Type::UIntTy, Op0Reg, CHi); - BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) + BuildMI(BB, IP, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); } + return; } + + // General 64x64 multiply + + unsigned Op1Reg = getReg(Op1, &BB, IP); + // Multiply the two low parts... capturing carry into EDX + BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); + BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL + + unsigned OverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL + BuildMI(BB, IP, X86::MOV32rr, 1, + OverflowReg).addReg(X86::EDX); // AL*BL >> 32 + + unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL + BuildMI(BB, IP, X86::IMUL32rr, 2, + AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg); + + unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, IP, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) + AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); + + unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH + BuildMI(BB, IP, X86::IMUL32rr, 2, + ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1); + + BuildMI(BB, IP, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) + DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); } @@ -2158,25 +2389,64 @@ void ISel::visitMul(BinaryOperator &I) { /// instructions work differently for signed and unsigned operands. /// void ISel::visitDivRem(BinaryOperator &I) { - unsigned Op0Reg = getReg(I.getOperand(0)); - unsigned Op1Reg = getReg(I.getOperand(1)); unsigned ResultReg = getReg(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Fold loads into floating point divides. + if (getClass(Op0->getType()) == cFP) { + if (LoadInst *LI = dyn_cast(Op1)) + if (isSafeToFoldLoadIntoInstruction(*LI, I)) { + const Type *Ty = Op0->getType(); + assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!"); + unsigned Opcode = Ty == Type::FloatTy ? X86::FDIV32m : X86::FDIV64m; + + unsigned BaseReg, Scale, IndexReg, Disp; + getAddressingMode(LI->getOperand(0), BaseReg, + Scale, IndexReg, Disp); + + unsigned Op0r = getReg(Op0); + addFullAddress(BuildMI(BB, Opcode, 2, ResultReg).addReg(Op0r), + BaseReg, Scale, IndexReg, Disp); + return; + } + + if (LoadInst *LI = dyn_cast(Op0)) + if (isSafeToFoldLoadIntoInstruction(*LI, I)) { + const Type *Ty = Op0->getType(); + assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!"); + unsigned Opcode = Ty == Type::FloatTy ? X86::FDIVR32m : X86::FDIVR64m; + + unsigned BaseReg, Scale, IndexReg, Disp; + getAddressingMode(LI->getOperand(0), BaseReg, + Scale, IndexReg, Disp); + + unsigned Op1r = getReg(Op1); + addFullAddress(BuildMI(BB, Opcode, 2, ResultReg).addReg(Op1r), + BaseReg, Scale, IndexReg, Disp); + return; + } + } + MachineBasicBlock::iterator IP = BB->end(); - emitDivRemOperation(BB, IP, Op0Reg, Op1Reg, I.getOpcode() == Instruction::Div, - I.getType(), ResultReg); + emitDivRemOperation(BB, IP, Op0, Op1, + I.getOpcode() == Instruction::Div, ResultReg); } void ISel::emitDivRemOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator IP, - unsigned Op0Reg, unsigned Op1Reg, bool isDiv, - const Type *Ty, unsigned ResultReg) { + Value *Op0, Value *Op1, bool isDiv, + unsigned ResultReg) { + const Type *Ty = Op0->getType(); unsigned Class = getClass(Ty); switch (Class) { case cFP: // Floating point divide if (isDiv) { - BuildMI(*BB, IP, X86::FpDIV, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg); + emitBinaryFPOperation(BB, IP, Op0, Op1, 3, ResultReg); + return; } else { // Floating point remainder... + unsigned Op0Reg = getReg(Op0, BB, IP); + unsigned Op1Reg = getReg(Op1, BB, IP); MachineInstr *TheCall = BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("fmod", true); std::vector Args; @@ -2188,7 +2458,8 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB, case cLong: { static const char *FnName[] = { "__moddi3", "__divdi3", "__umoddi3", "__udivdi3" }; - + unsigned Op0Reg = getReg(Op0, BB, IP); + unsigned Op1Reg = getReg(Op1, BB, IP); unsigned NameIdx = Ty->isUnsigned()*2 + isDiv; MachineInstr *TheCall = BuildMI(X86::CALLpcrel32, 1).addExternalSymbol(FnName[NameIdx], true); @@ -2220,11 +2491,13 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB, unsigned ExtReg = ExtRegs[Class]; // Put the first operand into one of the A registers... + unsigned Op0Reg = getReg(Op0, BB, IP); + unsigned Op1Reg = getReg(Op1, BB, IP); BuildMI(*BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg); if (isSigned) { // Emit a sign extension instruction... - unsigned ShiftResult = makeAnotherReg(Ty); + unsigned ShiftResult = makeAnotherReg(Op0->getType()); BuildMI(*BB, IP, SarOpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31); BuildMI(*BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult); } else { @@ -2421,16 +2694,42 @@ void ISel::visitLoadInst(LoadInst &I) { // Check to see if this load instruction is going to be folded into a binary // instruction, like add. If so, we don't want to emit it. Wouldn't a real // pattern matching instruction selector be nice? - if (I.hasOneUse() && getClassB(I.getType()) < cFP) { + unsigned Class = getClassB(I.getType()); + if (I.hasOneUse()) { Instruction *User = cast(I.use_back()); switch (User->getOpcode()) { - default: User = 0; break; + case Instruction::Cast: + // If this is a cast from a signed-integer type to a floating point type, + // fold the cast here. + if (getClass(User->getType()) == cFP && + (I.getType() == Type::ShortTy || I.getType() == Type::IntTy || + I.getType() == Type::LongTy)) { + unsigned DestReg = getReg(User); + static const unsigned Opcode[] = { + 0/*BYTE*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m + }; + unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0; + getAddressingMode(I.getOperand(0), BaseReg, Scale, IndexReg, Disp); + addFullAddress(BuildMI(BB, Opcode[Class], 5, DestReg), + BaseReg, Scale, IndexReg, Disp); + return; + } else { + User = 0; + } + break; + case Instruction::Add: case Instruction::Sub: case Instruction::And: case Instruction::Or: case Instruction::Xor: + if (Class == cLong) User = 0; break; + case Instruction::Mul: + case Instruction::Div: + if (Class != cFP) User = 0; + break; // Folding only implemented for floating point. + default: User = 0; break; } if (User) { @@ -2446,6 +2745,15 @@ void ISel::visitLoadInst(LoadInst &I) { if (User->getOperand(1) == &I && isSafeToFoldLoadIntoInstruction(I, *User)) return; // Eliminate the load! + + // If this is a floating point sub or div, we won't be able to swap the + // operands, but we will still be able to eliminate the load. + if (Class == cFP && User->getOperand(0) == &I && + !isa(User->getOperand(1)) && + (User->getOpcode() == Instruction::Sub || + User->getOpcode() == Instruction::Div) && + isSafeToFoldLoadIntoInstruction(I, *User)) + return; // Eliminate the load! } } @@ -2453,7 +2761,6 @@ void ISel::visitLoadInst(LoadInst &I) { unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0; getAddressingMode(I.getOperand(0), BaseReg, Scale, IndexReg, Disp); - unsigned Class = getClassB(I.getType()); if (Class == cLong) { addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg), BaseReg, Scale, IndexReg, Disp); @@ -2525,11 +2832,18 @@ void ISel::visitStoreInst(StoreInst &I) { /// void ISel::visitCastInst(CastInst &CI) { Value *Op = CI.getOperand(0); + + unsigned SrcClass = getClassB(Op->getType()); + unsigned DestClass = getClassB(CI.getType()); + // Noop casts are not emitted: getReg will return the source operand as the + // register to use for any uses of the noop cast. + if (DestClass == SrcClass) + return; + // If this is a cast from a 32-bit integer to a Long type, and the only uses // of the case are GEP instructions, then the cast does not need to be // generated explicitly, it will be folded into the GEP. - if (CI.getType() == Type::LongTy && - (Op->getType() == Type::IntTy || Op->getType() == Type::UIntTy)) { + if (DestClass == cLong && SrcClass == cInt) { bool AllUsesAreGEPs = true; for (Value::use_iterator I = CI.use_begin(), E = CI.use_end(); I != E; ++I) if (!isa(*I)) { @@ -2541,6 +2855,14 @@ void ISel::visitCastInst(CastInst &CI) { if (AllUsesAreGEPs) return; } + // If this cast converts a load from a short,int, or long integer to a FP + // value, we will have folded this cast away. + if (DestClass == cFP && isa(Op) && Op->hasOneUse() && + (Op->getType() == Type::ShortTy || Op->getType() == Type::IntTy || + Op->getType() == Type::LongTy)) + return; + + unsigned DestReg = getReg(CI); MachineBasicBlock::iterator MI = BB->end(); emitCastOperation(BB, MI, Op, CI.getType(), DestReg); @@ -2553,10 +2875,10 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator IP, Value *Src, const Type *DestTy, unsigned DestReg) { - unsigned SrcReg = getReg(Src, BB, IP); const Type *SrcTy = Src->getType(); unsigned SrcClass = getClassB(SrcTy); unsigned DestClass = getClassB(DestTy); + unsigned SrcReg = getReg(Src, BB, IP); // Implement casts to bool by using compare on the operand followed by set if // not zero on the result. @@ -2670,7 +2992,7 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, // a larger signed value, then use FLD on the larger value. // const Type *PromoteType = 0; - unsigned PromoteOpcode; + unsigned PromoteOpcode = 0; unsigned RealDestReg = DestReg; switch (SrcTy->getPrimitiveID()) { case Type::BoolTyID: