X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FInstSelectSimple.cpp;h=79dbedcdc1111f7624fa51382c76bb79358de75d;hb=832003578edcf93c9ad6869ee40c70c3ce89f415;hp=21b1dedf12a69bad71fda47d341beede7b6b4b81;hpb=548f61d66a4ef198352831a8cdf623d1f9f927e0;p=oota-llvm.git diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index 21b1dedf12a..79dbedcdc11 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -8,32 +8,26 @@ #include "X86InstrInfo.h" #include "X86InstrBuilder.h" #include "llvm/Function.h" -#include "llvm/iTerminators.h" -#include "llvm/iOperators.h" -#include "llvm/iOther.h" -#include "llvm/iPHINode.h" -#include "llvm/iMemory.h" -#include "llvm/Type.h" +#include "llvm/Instructions.h" #include "llvm/DerivedTypes.h" #include "llvm/Constants.h" #include "llvm/Pass.h" +#include "llvm/Intrinsics.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/SSARegMap.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Target/MRegisterInfo.h" -#include +#include "llvm/Support/InstVisitor.h" /// BMI - A special BuildMI variant that takes an iterator to insert the -/// instruction at as well as a basic block. -/// this is the version for when you have a destination register in mind. +/// instruction at as well as a basic block. This is the version for when you +/// have a destination register in mind. inline static MachineInstrBuilder BMI(MachineBasicBlock *MBB, MachineBasicBlock::iterator &I, - MachineOpCode Opcode, - unsigned NumOperands, + int Opcode, unsigned NumOperands, unsigned DestReg) { assert(I >= MBB->begin() && I <= MBB->end() && "Bad iterator!"); MachineInstr *MI = new MachineInstr(Opcode, NumOperands+1, true, true); @@ -45,9 +39,8 @@ inline static MachineInstrBuilder BMI(MachineBasicBlock *MBB, /// instruction at as well as a basic block. inline static MachineInstrBuilder BMI(MachineBasicBlock *MBB, MachineBasicBlock::iterator &I, - MachineOpCode Opcode, - unsigned NumOperands) { - assert(I > MBB->begin() && I <= MBB->end() && "Bad iterator!"); + int Opcode, unsigned NumOperands) { + assert(I >= MBB->begin() && I <= MBB->end() && "Bad iterator!"); MachineInstr *MI = new MachineInstr(Opcode, NumOperands, true, true); I = MBB->insert(I, MI)+1; return MachineInstrBuilder(MI); @@ -57,8 +50,9 @@ inline static MachineInstrBuilder BMI(MachineBasicBlock *MBB, namespace { struct ISel : public FunctionPass, InstVisitor { TargetMachine &TM; - MachineFunction *F; // The function we are compiling into - MachineBasicBlock *BB; // The current MBB we are compiling + MachineFunction *F; // The function we are compiling into + MachineBasicBlock *BB; // The current MBB we are compiling + int VarArgsFrameIndex; // FrameIndex for start of varargs area std::map RegMap; // Mapping between Val's and SSA Regs @@ -78,6 +72,8 @@ namespace { F->getBasicBlockList().push_back(MBBMap[I] = new MachineBasicBlock(I)); BB = &F->front(); + + // Copy incoming arguments off of the stack... LoadArgumentsToVirtualRegs(Fn); // Instruction select everything except PHI nodes @@ -89,7 +85,8 @@ namespace { RegMap.clear(); MBBMap.clear(); F = 0; - return false; // We never modify the LLVM itself. + // We always build a machine code representation for the function + return true; } virtual const char *getPassName() const { @@ -132,6 +129,7 @@ namespace { void doCall(const ValueRecord &Ret, MachineInstr *CallMI, const std::vector &Args); void visitCallInst(CallInst &I); + void visitIntrinsicCall(LLVMIntrinsic::ID ID, CallInst &I); // Arithmetic operators void visitSimpleBinary(BinaryOperator &B, unsigned OpcodeClass); @@ -171,6 +169,7 @@ namespace { void visitShiftInst(ShiftInst &I); void visitPHINode(PHINode &I) {} // PHI nodes handled by second pass void visitCastInst(CastInst &I); + void visitVarArgInst(VarArgInst &I); void visitInstruction(Instruction &I) { std::cerr << "Cannot instruction select: " << I; @@ -197,6 +196,13 @@ namespace { void emitCastOperation(MachineBasicBlock *BB,MachineBasicBlock::iterator&IP, Value *Src, const Type *DestTy, unsigned TargetReg); + /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary + /// and constant expression support. + void emitSimpleBinaryOperation(MachineBasicBlock *BB, + MachineBasicBlock::iterator &IP, + Value *Op0, Value *Op1, + unsigned OperatorClass, unsigned TargetReg); + /// copyConstantToRegister - Output the instructions required to put the /// specified constant into the specified register. /// @@ -213,9 +219,12 @@ namespace { /// of the long value. /// unsigned makeAnotherReg(const Type *Ty) { + assert(dynamic_cast(TM.getRegisterInfo()) && + "Current target doesn't have X86 reg info??"); + const X86RegisterInfo *MRI = + static_cast(TM.getRegisterInfo()); if (Ty == Type::LongTy || Ty == Type::ULongTy) { - const TargetRegisterClass *RC = - TM.getRegisterInfo()->getRegClassForType(Type::IntTy); + const TargetRegisterClass *RC = MRI->getRegClassForType(Type::IntTy); // Create the lower part F->getSSARegMap()->createVirtualRegister(RC); // Create the upper part. @@ -223,8 +232,7 @@ namespace { } // Add the mapping of regnumber => reg class to MachineFunction - const TargetRegisterClass *RC = - TM.getRegisterInfo()->getRegClassForType(Ty); + const TargetRegisterClass *RC = MRI->getRegClassForType(Ty); return F->getSSARegMap()->createVirtualRegister(RC); } @@ -308,17 +316,29 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB, MachineBasicBlock::iterator &IP, Constant *C, unsigned R) { if (ConstantExpr *CE = dyn_cast(C)) { - if (CE->getOpcode() == Instruction::GetElementPtr) { + unsigned Class = 0; + switch (CE->getOpcode()) { + case Instruction::GetElementPtr: emitGEPOperation(MBB, IP, CE->getOperand(0), CE->op_begin()+1, CE->op_end(), R); return; - } else if (CE->getOpcode() == Instruction::Cast) { + case Instruction::Cast: emitCastOperation(MBB, IP, CE->getOperand(0), CE->getType(), R); return; - } - std::cerr << "Offending expr: " << C << "\n"; - assert(0 && "Constant expressions not yet handled!\n"); + case Instruction::Xor: ++Class; // FALL THROUGH + case Instruction::Or: ++Class; // FALL THROUGH + case Instruction::And: ++Class; // FALL THROUGH + case Instruction::Sub: ++Class; // FALL THROUGH + case Instruction::Add: + emitSimpleBinaryOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1), + Class, R); + return; + + default: + std::cerr << "Offending expr: " << C << "\n"; + assert(0 && "Constant expressions not yet handled!\n"); + } } if (C->getType()->isIntegral()) { @@ -326,12 +346,7 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB, if (Class == cLong) { // Copy the value into the register pair. - uint64_t Val; - if (C->getType()->isSigned()) - Val = cast(C)->getValue(); - else - Val = cast(C)->getValue(); - + uint64_t Val = cast(C)->getRawValue(); BMI(MBB, IP, X86::MOVir32, 1, R).addZImm(Val & 0xFFFFFFFF); BMI(MBB, IP, X86::MOVir32, 1, R+1).addZImm(Val >> 32); return; @@ -345,12 +360,9 @@ void ISel::copyConstantToRegister(MachineBasicBlock *MBB, if (C->getType() == Type::BoolTy) { BMI(MBB, IP, X86::MOVir8, 1, R).addZImm(C == ConstantBool::True); - } else if (C->getType()->isSigned()) { - ConstantSInt *CSI = cast(C); - BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addZImm(CSI->getValue()); } else { - ConstantUInt *CUI = cast(C); - BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addZImm(CUI->getValue()); + ConstantInt *CI = cast(C); + BMI(MBB, IP, IntegralOpcodeTab[Class], 1, R).addZImm(CI->getRawValue()); } } else if (ConstantFP *CFP = dyn_cast(C)) { double Value = CFP->getValue(); @@ -432,6 +444,12 @@ void ISel::LoadArgumentsToVirtualRegs(Function &Fn) { } ArgOffset += 4; // Each argument takes at least 4 bytes on the stack... } + + // If the function takes variable number of arguments, add a frame offset for + // the start of the first vararg value... this is used to expand + // llvm.va_start. + if (Fn.getFunctionType()->isVarArg()) + VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); } @@ -462,18 +480,38 @@ void ISel::SelectPHINodes() { MBB->insert(MBB->begin()+NumPHIs++, LongPhiMI); } + // PHIValues - Map of blocks to incoming virtual registers. We use this + // so that we only initialize one incoming value for a particular block, + // even if the block has multiple entries in the PHI node. + // + std::map PHIValues; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { MachineBasicBlock *PredMBB = MBBMap[PN->getIncomingBlock(i)]; + unsigned ValReg; + std::map::iterator EntryIt = + PHIValues.lower_bound(PredMBB); + + if (EntryIt != PHIValues.end() && EntryIt->first == PredMBB) { + // We already inserted an initialization of the register for this + // predecessor. Recycle it. + ValReg = EntryIt->second; + + } else { + // Get the incoming value into a virtual register. If it is not + // already available in a virtual register, insert the computation + // code into PredMBB + // + MachineBasicBlock::iterator PI = PredMBB->end(); + while (PI != PredMBB->begin() && + TII.isTerminatorInstr((*(PI-1))->getOpcode())) + --PI; + ValReg = getReg(PN->getIncomingValue(i), PredMBB, PI); + + // Remember that we inserted a value for this PHI for this predecessor + PHIValues.insert(EntryIt, std::make_pair(PredMBB, ValReg)); + } - // Get the incoming value into a virtual register. If it is not already - // available in a virtual register, insert the computation code into - // PredMBB - // - MachineBasicBlock::iterator PI = PredMBB->end(); - while (PI != PredMBB->begin() && - TII.isTerminatorInstr((*(PI-1))->getOpcode())) - --PI; - unsigned ValReg = getReg(PN->getIncomingValue(i), PredMBB, PI); PhiMI->addRegOperand(ValReg); PhiMI->addMachineBasicBlockOperand(PredMBB); if (LongPhiMI) { @@ -536,26 +574,44 @@ bool ISel::EmitComparisonGetSignedness(unsigned OpNum, Value *Op0, Value *Op1) { // The arguments are already supposed to be of the same type. const Type *CompTy = Op0->getType(); bool isSigned = CompTy->isSigned(); - unsigned reg1 = getReg(Op0); - unsigned reg2 = getReg(Op1); - unsigned Class = getClassB(CompTy); + unsigned Op0r = getReg(Op0); + + // Special case handling of: cmp R, i + if (Class == cByte || Class == cShort || Class == cInt) + if (ConstantInt *CI = dyn_cast(Op1)) { + uint64_t Op1v = cast(CI)->getRawValue(); + + // Mask off any upper bits of the constant, if there are any... + Op1v &= (1ULL << (8 << Class)) - 1; + + switch (Class) { + case cByte: BuildMI(BB, X86::CMPri8, 2).addReg(Op0r).addZImm(Op1v);break; + case cShort: BuildMI(BB, X86::CMPri16,2).addReg(Op0r).addZImm(Op1v);break; + case cInt: BuildMI(BB, X86::CMPri32,2).addReg(Op0r).addZImm(Op1v);break; + default: + assert(0 && "Invalid class!"); + } + return isSigned; + } + + unsigned Op1r = getReg(Op1); switch (Class) { default: assert(0 && "Unknown type class!"); // Emit: cmp , (do the comparison). We can // compare 8-bit with 8-bit, 16-bit with 16-bit, 32-bit with // 32-bit. case cByte: - BuildMI(BB, X86::CMPrr8, 2).addReg(reg1).addReg(reg2); + BuildMI(BB, X86::CMPrr8, 2).addReg(Op0r).addReg(Op1r); break; case cShort: - BuildMI(BB, X86::CMPrr16, 2).addReg(reg1).addReg(reg2); + BuildMI(BB, X86::CMPrr16, 2).addReg(Op0r).addReg(Op1r); break; case cInt: - BuildMI(BB, X86::CMPrr32, 2).addReg(reg1).addReg(reg2); + BuildMI(BB, X86::CMPrr32, 2).addReg(Op0r).addReg(Op1r); break; case cFP: - BuildMI(BB, X86::FpUCOM, 2).addReg(reg1).addReg(reg2); + BuildMI(BB, X86::FpUCOM, 2).addReg(Op0r).addReg(Op1r); BuildMI(BB, X86::FNSTSWr8, 0); BuildMI(BB, X86::SAHF, 1); isSigned = false; // Compare with unsigned operators @@ -566,8 +622,8 @@ bool ISel::EmitComparisonGetSignedness(unsigned OpNum, Value *Op0, Value *Op1) { unsigned LoTmp = makeAnotherReg(Type::IntTy); unsigned HiTmp = makeAnotherReg(Type::IntTy); unsigned FinalTmp = makeAnotherReg(Type::IntTy); - BuildMI(BB, X86::XORrr32, 2, LoTmp).addReg(reg1).addReg(reg2); - BuildMI(BB, X86::XORrr32, 2, HiTmp).addReg(reg1+1).addReg(reg2+1); + BuildMI(BB, X86::XORrr32, 2, LoTmp).addReg(Op0r).addReg(Op1r); + BuildMI(BB, X86::XORrr32, 2, HiTmp).addReg(Op0r+1).addReg(Op1r+1); BuildMI(BB, X86::ORrr32, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp); break; // Allow the sete or setne to be generated from flags set by OR } else { @@ -584,9 +640,9 @@ bool ISel::EmitComparisonGetSignedness(unsigned OpNum, Value *Op0, Value *Op1) { // classes! Until then, hardcode registers so that we can deal with their // aliases (because we don't have conditional byte moves). // - BuildMI(BB, X86::CMPrr32, 2).addReg(reg1).addReg(reg2); + BuildMI(BB, X86::CMPrr32, 2).addReg(Op0r).addReg(Op1r); BuildMI(BB, SetCCOpcodeTab[0][OpNum], 0, X86::AL); - BuildMI(BB, X86::CMPrr32, 2).addReg(reg1+1).addReg(reg2+1); + BuildMI(BB, X86::CMPrr32, 2).addReg(Op0r+1).addReg(Op1r+1); BuildMI(BB, SetCCOpcodeTab[isSigned][OpNum], 0, X86::BL); BuildMI(BB, X86::CMOVErr16, 2, X86::BX).addReg(X86::BX).addReg(X86::AX); // NOTE: visitSetCondInst knows that the value is dumped into the BL @@ -671,13 +727,19 @@ void ISel::visitReturnInst(ReturnInst &I) { case cShort: case cInt: promote32(X86::EAX, ValueRecord(RetReg, RetVal->getType())); + // Declare that EAX is live on exit + BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::EAX).addReg(X86::ESP); break; case cFP: // Floats & Doubles: Return in ST(0) BuildMI(BB, X86::FpSETRESULT, 1).addReg(RetReg); + // Declare that top-of-stack is live on exit + BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::ST0).addReg(X86::ESP); break; case cLong: BuildMI(BB, X86::MOVrr32, 1, X86::EAX).addReg(RetReg); BuildMI(BB, X86::MOVrr32, 1, X86::EDX).addReg(RetReg+1); + // Declare that EAX & EDX are live on exit + BuildMI(BB, X86::IMPLICIT_USE, 3).addReg(X86::EAX).addReg(X86::EDX).addReg(X86::ESP); break; default: visitInstruction(I); @@ -868,6 +930,12 @@ void ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, void ISel::visitCallInst(CallInst &CI) { MachineInstr *TheCall; if (Function *F = CI.getCalledFunction()) { + // Is it an intrinsic function call? + if (LLVMIntrinsic::ID ID = (LLVMIntrinsic::ID)F->getIntrinsicID()) { + visitIntrinsicCall(ID, CI); // Special intrinsics are not handled here + return; + } + // Emit a CALL instruction with PC-relative displacement. TheCall = BuildMI(X86::CALLpcrel32, 1).addGlobalAddress(F, true); } else { // Emit an indirect call... @@ -884,44 +952,112 @@ void ISel::visitCallInst(CallInst &CI) { doCall(ValueRecord(DestReg, CI.getType()), TheCall, Args); } +void ISel::visitIntrinsicCall(LLVMIntrinsic::ID ID, CallInst &CI) { + unsigned TmpReg1, TmpReg2; + switch (ID) { + case LLVMIntrinsic::va_start: + // Get the address of the first vararg value... + TmpReg1 = makeAnotherReg(Type::UIntTy); + addFrameReference(BuildMI(BB, X86::LEAr32, 5, TmpReg1), VarArgsFrameIndex); + TmpReg2 = getReg(CI.getOperand(1)); + addDirectMem(BuildMI(BB, X86::MOVrm32, 5), TmpReg2).addReg(TmpReg1); + return; + + case LLVMIntrinsic::va_end: return; // Noop on X86 + case LLVMIntrinsic::va_copy: + TmpReg1 = getReg(CI.getOperand(2)); // Get existing va_list + TmpReg2 = getReg(CI.getOperand(1)); // Get va_list* to store into + addDirectMem(BuildMI(BB, X86::MOVrm32, 5), TmpReg2).addReg(TmpReg1); + return; + + case LLVMIntrinsic::longjmp: + BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("abort", true); + return; + + case LLVMIntrinsic::setjmp: + // Setjmp always returns zero... + BuildMI(BB, X86::MOVir32, 1, getReg(CI)).addZImm(0); + return; + default: assert(0 && "Unknown intrinsic for X86!"); + } +} + + +/// visitSimpleBinary - Implement simple binary operators for integral types... +/// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for +/// Xor. +void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) { + unsigned DestReg = getReg(B); + MachineBasicBlock::iterator MI = BB->end(); + emitSimpleBinaryOperation(BB, MI, B.getOperand(0), B.getOperand(1), + OperatorClass, DestReg); +} /// visitSimpleBinary - Implement simple binary operators for integral types... /// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, /// 4 for Xor. /// -void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) { - unsigned Class = getClassB(B.getType()); - - static const unsigned OpcodeTab[][4] = { - // Arithmetic operators - { X86::ADDrr8, X86::ADDrr16, X86::ADDrr32, X86::FpADD }, // ADD - { X86::SUBrr8, X86::SUBrr16, X86::SUBrr32, X86::FpSUB }, // SUB +/// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary +/// and constant expression support. +void ISel::emitSimpleBinaryOperation(MachineBasicBlock *BB, + MachineBasicBlock::iterator &IP, + Value *Op0, Value *Op1, + unsigned OperatorClass,unsigned TargetReg){ + unsigned Class = getClassB(Op0->getType()); + if (!isa(Op1) || Class == cLong) { + static const unsigned OpcodeTab[][4] = { + // Arithmetic operators + { X86::ADDrr8, X86::ADDrr16, X86::ADDrr32, X86::FpADD }, // ADD + { X86::SUBrr8, X86::SUBrr16, X86::SUBrr32, X86::FpSUB }, // SUB + + // Bitwise operators + { X86::ANDrr8, X86::ANDrr16, X86::ANDrr32, 0 }, // AND + { X86:: ORrr8, X86:: ORrr16, X86:: ORrr32, 0 }, // OR + { X86::XORrr8, X86::XORrr16, X86::XORrr32, 0 }, // XOR + }; + + bool isLong = false; + if (Class == cLong) { + isLong = true; + Class = cInt; // Bottom 32 bits are handled just like ints + } + + unsigned Opcode = OpcodeTab[OperatorClass][Class]; + assert(Opcode && "Floating point arguments to logical inst?"); + unsigned Op0r = getReg(Op0, BB, IP); + unsigned Op1r = getReg(Op1, BB, IP); + BMI(BB, IP, Opcode, 2, TargetReg).addReg(Op0r).addReg(Op1r); + + if (isLong) { // Handle the upper 32 bits of long values... + static const unsigned TopTab[] = { + X86::ADCrr32, X86::SBBrr32, X86::ANDrr32, X86::ORrr32, X86::XORrr32 + }; + BMI(BB, IP, TopTab[OperatorClass], 2, + TargetReg+1).addReg(Op0r+1).addReg(Op1r+1); + } + } else { + // Special case: op Reg, + ConstantInt *Op1C = cast(Op1); - // Bitwise operators - { X86::ANDrr8, X86::ANDrr16, X86::ANDrr32, 0 }, // AND - { X86:: ORrr8, X86:: ORrr16, X86:: ORrr32, 0 }, // OR - { X86::XORrr8, X86::XORrr16, X86::XORrr32, 0 }, // XOR - }; + static const unsigned OpcodeTab[][3] = { + // Arithmetic operators + { X86::ADDri8, X86::ADDri16, X86::ADDri32 }, // ADD + { X86::SUBri8, X86::SUBri16, X86::SUBri32 }, // SUB + + // Bitwise operators + { X86::ANDri8, X86::ANDri16, X86::ANDri32 }, // AND + { X86:: ORri8, X86:: ORri16, X86:: ORri32 }, // OR + { X86::XORri8, X86::XORri16, X86::XORri32 }, // XOR + }; - bool isLong = false; - if (Class == cLong) { - isLong = true; - Class = cInt; // Bottom 32 bits are handled just like ints - } - - unsigned Opcode = OpcodeTab[OperatorClass][Class]; - assert(Opcode && "Floating point arguments to logical inst?"); - unsigned Op0r = getReg(B.getOperand(0)); - unsigned Op1r = getReg(B.getOperand(1)); - unsigned DestReg = getReg(B); - BuildMI(BB, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r); + assert(Class < 3 && "General code handles 64-bit integer types!"); + unsigned Opcode = OpcodeTab[OperatorClass][Class]; + unsigned Op0r = getReg(Op0, BB, IP); + uint64_t Op1v = cast(Op1C)->getRawValue(); - if (isLong) { // Handle the upper 32 bits of long values... - static const unsigned TopTab[] = { - X86::ADCrr32, X86::SBBrr32, X86::ANDrr32, X86::ORrr32, X86::XORrr32 - }; - BuildMI(BB, TopTab[OperatorClass], 2, - DestReg+1).addReg(Op0r+1).addReg(Op1r+1); + // Mask off any upper bits of the constant, if there are any... + Op1v &= (1ULL << (8 << Class)) - 1; + BMI(BB, IP, Opcode, 2, TargetReg).addReg(Op0r).addZImm(Op1v); } } @@ -929,8 +1065,6 @@ void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) { /// registers op0Reg and op1Reg, and put the result in DestReg. The type of the /// result should be given as DestTy. /// -/// FIXME: doMultiply should use one of the two address IMUL instructions! -/// void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI, unsigned DestReg, const Type *DestTy, unsigned op0Reg, unsigned op1Reg) { @@ -939,28 +1073,20 @@ void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator &MBBI, case cFP: // Floating point multiply BMI(BB, MBBI, X86::FpMUL, 2, DestReg).addReg(op0Reg).addReg(op1Reg); return; + case cInt: + case cShort: + BMI(BB, MBBI, Class == cInt ? X86::IMULr32 : X86::IMULr16, 2, DestReg) + .addReg(op0Reg).addReg(op1Reg); + return; + case cByte: + // Must use the MUL instruction, which forces use of AL... + BMI(MBB, MBBI, X86::MOVrr8, 1, X86::AL).addReg(op0Reg); + BMI(MBB, MBBI, X86::MULr8, 1).addReg(op1Reg); + BMI(MBB, MBBI, X86::MOVrr8, 1, DestReg).addReg(X86::AL); + return; default: case cLong: assert(0 && "doMultiply cannot operate on LONG values!"); - case cByte: - case cShort: - case cInt: // Small integerals, handled below... - break; } - - static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX }; - static const unsigned MulOpcode[]={ X86::MULr8 , X86::MULr16 , X86::MULr32 }; - static const unsigned MovOpcode[]={ X86::MOVrr8, X86::MOVrr16, X86::MOVrr32 }; - unsigned Reg = Regs[Class]; - - // Emit a MOV to put the first operand into the appropriately-sized - // subreg of EAX. - BMI(MBB, MBBI, MovOpcode[Class], 1, Reg).addReg(op0Reg); - - // Emit the appropriate multiply instruction. - BMI(MBB, MBBI, MulOpcode[Class], 1).addReg(op1Reg); - - // Emit another MOV to put the result into the destination register. - BMI(MBB, MBBI, MovOpcode[Class], 1, DestReg).addReg(Reg); } /// visitMul - Multiplies are not simple binary operators because they must deal @@ -986,16 +1112,16 @@ void ISel::visitMul(BinaryOperator &I) { BuildMI(BB, X86::MOVrr32, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32 MachineBasicBlock::iterator MBBI = BB->end(); - unsigned AHBLReg = makeAnotherReg(Type::UIntTy); - doMultiply(BB, MBBI, AHBLReg, Type::UIntTy, Op0Reg+1, Op1Reg); // AH*BL + unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL + BMI(BB, MBBI, X86::IMULr32, 2, AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg); unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); BuildMI(BB, X86::ADDrr32, 2, // AH*BL+(AL*BL >> 32) AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); MBBI = BB->end(); - unsigned ALBHReg = makeAnotherReg(Type::UIntTy); - doMultiply(BB, MBBI, ALBHReg, Type::UIntTy, Op0Reg, Op1Reg+1); // AL*BH + unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH + BMI(BB, MBBI, X86::IMULr32, 2, ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1); BuildMI(BB, X86::ADDrr32, 2, // AL*BH + AH*BL + (AL*BL >> 32) DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); @@ -1049,7 +1175,7 @@ void ISel::visitDivRem(BinaryOperator &I) { static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX }; static const unsigned MovOpcode[]={ X86::MOVrr8, X86::MOVrr16, X86::MOVrr32 }; - static const unsigned ExtOpcode[]={ X86::CBW , X86::CWD , X86::CDQ }; + static const unsigned SarOpcode[]={ X86::SARir8, X86::SARir16, X86::SARir32 }; static const unsigned ClrOpcode[]={ X86::XORrr8, X86::XORrr16, X86::XORrr32 }; static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX }; @@ -1067,7 +1193,9 @@ void ISel::visitDivRem(BinaryOperator &I) { if (isSigned) { // Emit a sign extension instruction... - BuildMI(BB, ExtOpcode[Class], 0); + unsigned ShiftResult = makeAnotherReg(I.getType()); + BuildMI(BB, SarOpcode[Class], 2, ShiftResult).addReg(Op0Reg).addZImm(31); + BuildMI(BB, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult); } else { // If unsigned, emit a zeroing instruction... (reg = xor reg, reg) BuildMI(BB, ClrOpcode[Class], 2, ExtReg).addReg(ExtReg).addReg(ExtReg); @@ -1140,7 +1268,57 @@ void ISel::visitShiftInst(ShiftInst &I) { } } } else { - visitInstruction(I); // FIXME: Implement long shift by non-constant + unsigned TmpReg = makeAnotherReg(Type::IntTy); + + if (!isLeftShift && isSigned) { + // If this is a SHR of a Long, then we need to do funny sign extension + // stuff. TmpReg gets the value to use as the high-part if we are + // shifting more than 32 bits. + BuildMI(BB, X86::SARir32, 2, TmpReg).addReg(SrcReg).addZImm(31); + } else { + // Other shifts use a fixed zero value if the shift is more than 32 + // bits. + BuildMI(BB, X86::MOVir32, 1, TmpReg).addZImm(0); + } + + // Initialize CL with the shift amount... + unsigned ShiftAmount = getReg(I.getOperand(1)); + BuildMI(BB, X86::MOVrr8, 1, X86::CL).addReg(ShiftAmount); + + unsigned TmpReg2 = makeAnotherReg(Type::IntTy); + unsigned TmpReg3 = makeAnotherReg(Type::IntTy); + if (isLeftShift) { + // TmpReg2 = shld inHi, inLo + BuildMI(BB, X86::SHLDrr32, 2, TmpReg2).addReg(SrcReg+1).addReg(SrcReg); + // TmpReg3 = shl inLo, CL + BuildMI(BB, X86::SHLrr32, 1, TmpReg3).addReg(SrcReg); + + // Set the flags to indicate whether the shift was by more than 32 bits. + BuildMI(BB, X86::TESTri8, 2).addReg(X86::CL).addZImm(32); + + // DestHi = (>32) ? TmpReg3 : TmpReg2; + BuildMI(BB, X86::CMOVNErr32, 2, + DestReg+1).addReg(TmpReg2).addReg(TmpReg3); + // DestLo = (>32) ? TmpReg : TmpReg3; + BuildMI(BB, X86::CMOVNErr32, 2, DestReg).addReg(TmpReg3).addReg(TmpReg); + } else { + // TmpReg2 = shrd inLo, inHi + BuildMI(BB, X86::SHRDrr32, 2, TmpReg2).addReg(SrcReg).addReg(SrcReg+1); + // TmpReg3 = s[ah]r inHi, CL + BuildMI(BB, isSigned ? X86::SARrr32 : X86::SHRrr32, 1, TmpReg3) + .addReg(SrcReg+1); + + // Set the flags to indicate whether the shift was by more than 32 bits. + BuildMI(BB, X86::TESTri8, 2).addReg(X86::CL).addZImm(32); + + // DestLo = (>32) ? TmpReg3 : TmpReg2; + BuildMI(BB, X86::CMOVNErr32, 2, + DestReg).addReg(TmpReg2).addReg(TmpReg3); + + // DestHi = (>32) ? TmpReg : TmpReg3; + BuildMI(BB, X86::CMOVNErr32, 2, + DestReg+1).addReg(TmpReg3).addReg(TmpReg); + } } return; } @@ -1245,7 +1423,7 @@ void ISel::visitLoadInst(LoadInst &I) { unsigned SrcAddrReg = getReg(I.getOperand(0)); unsigned DestReg = getReg(I); - unsigned Class = getClass(I.getType()); + unsigned Class = getClassB(I.getType()); switch (Class) { case cFP: { MachineBasicBlock::iterator MBBI = BB->end(); @@ -1345,7 +1523,7 @@ void ISel::visitStoreInst(StoreInst &I) { unsigned ValReg = getReg(I.getOperand(0)); unsigned AddressReg = getReg(I.getOperand(1)); - unsigned Class = getClass(I.getOperand(0)->getType()); + unsigned Class = getClassB(I.getOperand(0)->getType()); switch (Class) { case cLong: if (isLittleEndian) { @@ -1390,9 +1568,26 @@ void ISel::visitStoreInst(StoreInst &I) { /// visitCastInst - Here we have various kinds of copying with or without /// sign extension going on. void ISel::visitCastInst(CastInst &CI) { + Value *Op = CI.getOperand(0); + // If this is a cast from a 32-bit integer to a Long type, and the only uses + // of the case are GEP instructions, then the cast does not need to be + // generated explicitly, it will be folded into the GEP. + if (CI.getType() == Type::LongTy && + (Op->getType() == Type::IntTy || Op->getType() == Type::UIntTy)) { + bool AllUsesAreGEPs = true; + for (Value::use_iterator I = CI.use_begin(), E = CI.use_end(); I != E; ++I) + if (!isa(*I)) { + AllUsesAreGEPs = false; + break; + } + + // No need to codegen this cast if all users are getelementptr instrs... + if (AllUsesAreGEPs) return; + } + unsigned DestReg = getReg(CI); MachineBasicBlock::iterator MI = BB->end(); - emitCastOperation(BB, MI, CI.getOperand(0), CI.getType(), DestReg); + emitCastOperation(BB, MI, Op, CI.getType(), DestReg); } /// emitCastOperation - Common code shared between visitCastInst and @@ -1401,7 +1596,7 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator &IP, Value *Src, const Type *DestTy, unsigned DestReg) { - unsigned SrcReg = getReg(Src); + unsigned SrcReg = getReg(Src, BB, IP); const Type *SrcTy = Src->getType(); unsigned SrcClass = getClassB(SrcTy); unsigned DestClass = getClassB(DestTy); @@ -1409,10 +1604,28 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, // Implement casts to bool by using compare on the operand followed by set if // not zero on the result. if (DestTy == Type::BoolTy) { - if (SrcClass == cFP || SrcClass == cLong) - abort(); // FIXME: implement cast (long & FP) to bool - - BMI(BB, IP, X86::CMPri8, 2).addReg(SrcReg).addZImm(0); + switch (SrcClass) { + case cByte: + BMI(BB, IP, X86::TESTrr8, 2).addReg(SrcReg).addReg(SrcReg); + break; + case cShort: + BMI(BB, IP, X86::TESTrr16, 2).addReg(SrcReg).addReg(SrcReg); + break; + case cInt: + BMI(BB, IP, X86::TESTrr32, 2).addReg(SrcReg).addReg(SrcReg); + break; + case cLong: { + unsigned TmpReg = makeAnotherReg(Type::IntTy); + BMI(BB, IP, X86::ORrr32, 2, TmpReg).addReg(SrcReg).addReg(SrcReg+1); + break; + } + case cFP: + assert(0 && "FIXME: implement cast FP to bool"); + abort(); + } + + // If the zero flag is not set, then the value is true, set the byte to + // true. BMI(BB, IP, X86::SETNEr, 1, DestReg); return; } @@ -1444,6 +1657,7 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, BMI(BB, IP, X86::MOVrr32, 1, DestReg).addReg(SrcReg); BMI(BB, IP, X86::MOVrr32, 1, DestReg+1).addReg(SrcReg+1); } else { + assert(0 && "Cannot handle this type of cast instruction!"); abort(); } return; @@ -1492,19 +1706,50 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, // Handle casts from integer to floating point now... if (DestClass == cFP) { - // unsigned int -> load as 64 bit int. - // unsigned long long -> more complex - if (SrcTy->isUnsigned() && SrcTy != Type::UByteTy) - abort(); // don't handle unsigned src yet! - - // We don't have the facilities for directly loading byte sized data from - // memory. Promote it to 16 bits. - if (SrcClass == cByte) { - unsigned TmpReg = makeAnotherReg(Type::ShortTy); + // Promote the integer to a type supported by FLD. We do this because there + // are no unsigned FLD instructions, so we must promote an unsigned value to + // a larger signed value, then use FLD on the larger value. + // + const Type *PromoteType = 0; + unsigned PromoteOpcode; + switch (SrcTy->getPrimitiveID()) { + case Type::BoolTyID: + case Type::SByteTyID: + // We don't have the facilities for directly loading byte sized data from + // memory (even signed). Promote it to 16 bits. + PromoteType = Type::ShortTy; + PromoteOpcode = X86::MOVSXr16r8; + break; + case Type::UByteTyID: + PromoteType = Type::ShortTy; + PromoteOpcode = X86::MOVZXr16r8; + break; + case Type::UShortTyID: + PromoteType = Type::IntTy; + PromoteOpcode = X86::MOVZXr32r16; + break; + case Type::UIntTyID: { + // Make a 64 bit temporary... and zero out the top of it... + unsigned TmpReg = makeAnotherReg(Type::LongTy); + BMI(BB, IP, X86::MOVrr32, 1, TmpReg).addReg(SrcReg); + BMI(BB, IP, X86::MOVir32, 1, TmpReg+1).addZImm(0); + SrcTy = Type::LongTy; + SrcClass = cLong; + SrcReg = TmpReg; + break; + } + case Type::ULongTyID: + assert("FIXME: not implemented: cast ulong X to fp type!"); + default: // No promotion needed... + break; + } + + if (PromoteType) { + unsigned TmpReg = makeAnotherReg(PromoteType); BMI(BB, IP, SrcTy->isSigned() ? X86::MOVSXr16r8 : X86::MOVZXr16r8, 1, TmpReg).addReg(SrcReg); - SrcTy = Type::ShortTy; // Pretend the short is our input now! - SrcClass = cShort; + SrcTy = PromoteType; + SrcClass = getClass(PromoteType); SrcReg = TmpReg; } @@ -1513,7 +1758,6 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData()); if (SrcClass == cLong) { - if (SrcTy == Type::ULongTy) abort(); // FIXME: Handle ulong -> FP addFrameReference(BMI(BB, IP, X86::MOVrm32, 5), FrameIdx).addReg(SrcReg); addFrameReference(BMI(BB, IP, X86::MOVrm32, 5), FrameIdx, 4).addReg(SrcReg+1); @@ -1523,7 +1767,7 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, } static const unsigned Op2[] = - { 0, X86::FILDr16, X86::FILDr32, 0, X86::FILDr64 }; + { 0/*byte*/, X86::FILDr16, X86::FILDr32, 0/*FP*/, X86::FILDr64 }; addFrameReference(BMI(BB, IP, Op2[SrcClass], 5, DestReg), FrameIdx); return; } @@ -1560,7 +1804,10 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, case cByte: StoreTy = Type::ShortTy; StoreClass = cShort; break; case cShort: StoreTy = Type::IntTy; StoreClass = cInt; break; case cInt: StoreTy = Type::LongTy; StoreClass = cLong; break; - case cLong: abort(); // FIXME: unsigned long long -> more complex + // The following treatment of cLong may not be perfectly right, + // but it survives chains of casts of the form + // double->ulong->double. + case cLong: StoreTy = Type::LongTy; StoreClass = cLong; break; default: assert(0 && "Unknown store class!"); } @@ -1586,9 +1833,53 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, } // Anything we haven't handled already, we can't (yet) handle at all. + assert(0 && "Unhandled cast instruction!"); abort(); } +/// visitVarArgInst - Implement the va_arg instruction... +/// +void ISel::visitVarArgInst(VarArgInst &I) { + unsigned SrcReg = getReg(I.getOperand(0)); + unsigned DestReg = getReg(I); + + // Load the va_list into a register... + unsigned VAList = makeAnotherReg(Type::UIntTy); + addDirectMem(BuildMI(BB, X86::MOVmr32, 4, VAList), SrcReg); + + unsigned Size; + switch (I.getType()->getPrimitiveID()) { + default: + std::cerr << I; + assert(0 && "Error: bad type for va_arg instruction!"); + return; + case Type::PointerTyID: + case Type::UIntTyID: + case Type::IntTyID: + Size = 4; + addDirectMem(BuildMI(BB, X86::MOVmr32, 4, DestReg), VAList); + break; + case Type::ULongTyID: + case Type::LongTyID: + Size = 8; + addDirectMem(BuildMI(BB, X86::MOVmr32, 4, DestReg), VAList); + addRegOffset(BuildMI(BB, X86::MOVmr32, 4, DestReg+1), VAList, 4); + break; + case Type::DoubleTyID: + Size = 8; + addDirectMem(BuildMI(BB, X86::FLDr64, 4, DestReg), VAList); + break; + } + + // Increment the VAList pointer... + unsigned NextVAList = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::ADDri32, 2, NextVAList).addReg(VAList).addZImm(Size); + + // Update the VAList in memory... + addDirectMem(BuildMI(BB, X86::MOVrm32, 5), SrcReg).addReg(NextVAList); +} + + // ExactLog2 - This function solves for (Val == 1 << (N-1)) and returns N. It // returns zero when the input is not exactly a power of two. static unsigned ExactLog2(unsigned Val) { @@ -1652,6 +1943,13 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, // time. assert(idx->getType() == Type::LongTy && "Bad GEP array index!"); + // Most GEP instructions use a [cast (int/uint) to LongTy] as their + // operand on X86. Handle this case directly now... + if (CastInst *CI = dyn_cast(idx)) + if (CI->getOperand(0)->getType() == Type::IntTy || + CI->getOperand(0)->getType() == Type::UIntTy) + idx = CI->getOperand(0); + // We want to add BaseReg to(idxReg * sizeof ElementType). First, we // must find the size of the pointed-to type (Not coincidentally, the next // type is the type of the elements in the array). @@ -1752,7 +2050,7 @@ void ISel::visitAllocaInst(AllocaInst &I) { // the stack pointer. BuildMI(BB, X86::MOVrr32, 1, getReg(I)).addReg(X86::ESP); - // Inform the Frame Information that we have just allocated a variable sized + // Inform the Frame Information that we have just allocated a variable-sized // object. F->getFrameInfo()->CreateVariableSizedObject(); } @@ -1772,8 +2070,6 @@ void ISel::visitMallocInst(MallocInst &I) { unsigned Op1Reg = getReg(I.getOperand(0)); MachineBasicBlock::iterator MBBI = BB->end(); doMultiply(BB, MBBI, Arg, Type::UIntTy, Op0Reg, Op1Reg); - - } std::vector Args; @@ -1797,10 +2093,10 @@ void ISel::visitFreeInst(FreeInst &I) { } -/// createSimpleX86InstructionSelector - This pass converts an LLVM function +/// createX86SimpleInstructionSelector - This pass converts an LLVM function /// into a machine code representation is a very simple peep-hole fashion. The /// generated code sucks but the implementation is nice and simple. /// -Pass *createSimpleX86InstructionSelector(TargetMachine &TM) { +Pass *createX86SimpleInstructionSelector(TargetMachine &TM) { return new ISel(TM); }