1 //===-- InstSelectSimple.cpp - A simple instruction selector for x86 ------===//
3 // This file defines a simple peephole instruction selector for the x86 platform
5 //===----------------------------------------------------------------------===//
8 #include "X86InstrInfo.h"
9 #include "X86InstrBuilder.h"
10 #include "llvm/Function.h"
11 #include "llvm/iTerminators.h"
12 #include "llvm/iOperators.h"
13 #include "llvm/iOther.h"
14 #include "llvm/iPHINode.h"
15 #include "llvm/iMemory.h"
16 #include "llvm/Type.h"
17 #include "llvm/Constants.h"
18 #include "llvm/Pass.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/Target/TargetMachine.h"
22 #include "llvm/Support/InstVisitor.h"
23 #include "llvm/Target/MRegisterInfo.h"
26 using namespace MOTy; // Get Use, Def, UseAndDef
29 struct ISel : public FunctionPass, InstVisitor<ISel> {
31 MachineFunction *F; // The function we are compiling into
32 MachineBasicBlock *BB; // The current MBB we are compiling
35 std::map<Value*, unsigned> RegMap; // Mapping between Val's and SSA Regs
37 ISel(TargetMachine &tm)
38 : TM(tm), F(0), BB(0), CurReg(MRegisterInfo::FirstVirtualRegister) {}
40 /// runOnFunction - Top level implementation of instruction selection for
41 /// the entire function.
43 bool runOnFunction(Function &Fn) {
44 F = &MachineFunction::construct(&Fn, TM);
48 return false; // We never modify the LLVM itself.
51 /// visitBasicBlock - This method is called when we are visiting a new basic
52 /// block. This simply creates a new MachineBasicBlock to emit code into
53 /// and adds it to the current MachineFunction. Subsequent visit* for
54 /// instructions will be invoked for all instructions in the basic block.
56 void visitBasicBlock(BasicBlock &LLVM_BB) {
57 BB = new MachineBasicBlock(&LLVM_BB);
58 // FIXME: Use the auto-insert form when it's available
59 F->getBasicBlockList().push_back(BB);
62 // Visitation methods for various instructions. These methods simply emit
63 // fixed X86 code for each instruction.
65 void visitReturnInst(ReturnInst &RI);
66 void visitBranchInst(BranchInst &BI);
68 // Arithmetic operators
69 void visitSimpleBinary(BinaryOperator &B, unsigned OpcodeClass);
70 void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); }
71 void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); }
72 void visitMul(BinaryOperator &B);
74 void visitDiv(BinaryOperator &B) { visitDivRem(B); }
75 void visitRem(BinaryOperator &B) { visitDivRem(B); }
76 void visitDivRem(BinaryOperator &B);
79 void visitAnd(BinaryOperator &B) { visitSimpleBinary(B, 2); }
80 void visitOr (BinaryOperator &B) { visitSimpleBinary(B, 3); }
81 void visitXor(BinaryOperator &B) { visitSimpleBinary(B, 4); }
83 // Binary comparison operators
84 void visitSetCondInst(SetCondInst &I);
86 // Memory Instructions
87 void visitLoadInst(LoadInst &I);
88 void visitStoreInst(StoreInst &I);
91 void visitShiftInst(ShiftInst &I);
92 void visitPHINode(PHINode &I);
94 void visitInstruction(Instruction &I) {
95 std::cerr << "Cannot instruction select: " << I;
100 /// copyConstantToRegister - Output the instructions required to put the
101 /// specified constant into the specified register.
103 void copyConstantToRegister(Constant *C, unsigned Reg);
105 /// getReg - This method turns an LLVM value into a register number. This
106 /// is guaranteed to produce the same register number for a particular value
107 /// every time it is queried.
109 unsigned getReg(Value &V) { return getReg(&V); } // Allow references
110 unsigned getReg(Value *V) {
111 unsigned &Reg = RegMap[V];
116 // Add the mapping of regnumber => reg class to MachineFunction
118 TM.getRegisterInfo()->getRegClassForType(V->getType()));
121 // If this operand is a constant, emit the code to copy the constant into
122 // the register here...
124 if (Constant *C = dyn_cast<Constant>(V))
125 copyConstantToRegister(C, Reg);
132 /// TypeClass - Used by the X86 backend to group LLVM types by their basic X86
136 cByte, cShort, cInt, cLong, cFloat, cDouble
139 /// getClass - Turn a primitive type into a "class" number which is based on the
140 /// size of the type, and whether or not it is floating point.
142 static inline TypeClass getClass(const Type *Ty) {
143 switch (Ty->getPrimitiveID()) {
144 case Type::SByteTyID:
145 case Type::UByteTyID: return cByte; // Byte operands are class #0
146 case Type::ShortTyID:
147 case Type::UShortTyID: return cShort; // Short operands are class #1
150 case Type::PointerTyID: return cInt; // Int's and pointers are class #2
153 case Type::ULongTyID: return cLong; // Longs are class #3
154 case Type::FloatTyID: return cFloat; // Float is class #4
155 case Type::DoubleTyID: return cDouble; // Doubles are class #5
157 assert(0 && "Invalid type to getClass!");
158 return cByte; // not reached
163 /// copyConstantToRegister - Output the instructions required to put the
164 /// specified constant into the specified register.
166 void ISel::copyConstantToRegister(Constant *C, unsigned R) {
167 assert (!isa<ConstantExpr>(C) && "Constant expressions not yet handled!\n");
169 if (C->getType()->isIntegral()) {
170 unsigned Class = getClass(C->getType());
171 assert(Class != 3 && "Type not handled yet!");
173 static const unsigned IntegralOpcodeTab[] = {
174 X86::MOVir8, X86::MOVir16, X86::MOVir32
177 if (C->getType()->isSigned()) {
178 ConstantSInt *CSI = cast<ConstantSInt>(C);
179 BuildMI(BB, IntegralOpcodeTab[Class], 1, R).addSImm(CSI->getValue());
181 ConstantUInt *CUI = cast<ConstantUInt>(C);
182 BuildMI(BB, IntegralOpcodeTab[Class], 1, R).addZImm(CUI->getValue());
185 assert(0 && "Type not handled yet!");
190 /// SetCC instructions - Here we just emit boilerplate code to set a byte-sized
191 /// register, then move it to wherever the result should be.
192 /// We handle FP setcc instructions by pushing them, doing a
193 /// compare-and-pop-twice, and then copying the concodes to the main
194 /// processor's concodes (I didn't make this up, it's in the Intel manual)
197 ISel::visitSetCondInst (SetCondInst & I)
199 // The arguments are already supposed to be of the same type.
200 Value *var1 = I.getOperand (0);
201 Value *var2 = I.getOperand (1);
202 unsigned reg1 = getReg (var1);
203 unsigned reg2 = getReg (var2);
204 unsigned resultReg = getReg (I);
205 unsigned comparisonWidth = var1->getType ()->getPrimitiveSize ();
206 unsigned unsignedComparison = var1->getType ()->isUnsigned ();
207 unsigned resultWidth = I.getType ()->getPrimitiveSize ();
208 bool fpComparison = var1->getType ()->isFloatingPoint ();
211 // Push the variables on the stack with fldl opcodes.
212 // FIXME: assuming var1, var2 are in memory, if not, spill to
214 switch (comparisonWidth)
217 BuildMI (BB, X86::FLDr4, 1, X86::NoReg).addReg (reg1);
220 BuildMI (BB, X86::FLDr8, 1, X86::NoReg).addReg (reg1);
223 visitInstruction (I);
226 switch (comparisonWidth)
229 BuildMI (BB, X86::FLDr4, 1, X86::NoReg).addReg (reg2);
232 BuildMI (BB, X86::FLDr8, 1, X86::NoReg).addReg (reg2);
235 visitInstruction (I);
238 // (Non-trapping) compare and pop twice.
239 BuildMI (BB, X86::FUCOMPP, 0);
240 // Move fp status word (concodes) to ax.
241 BuildMI (BB, X86::FNSTSWr8, 1, X86::AX);
242 // Load real concodes from ax.
243 BuildMI (BB, X86::SAHF, 1, X86::EFLAGS).addReg(X86::AH);
246 { // integer comparison
247 // Emit: cmp <var1>, <var2> (do the comparison). We can
248 // compare 8-bit with 8-bit, 16-bit with 16-bit, 32-bit with
250 switch (comparisonWidth)
253 BuildMI (BB, X86::CMPrr8, 2,
254 X86::EFLAGS).addReg (reg1).addReg (reg2);
257 BuildMI (BB, X86::CMPrr16, 2,
258 X86::EFLAGS).addReg (reg1).addReg (reg2);
261 BuildMI (BB, X86::CMPrr32, 2,
262 X86::EFLAGS).addReg (reg1).addReg (reg2);
266 visitInstruction (I);
270 // Emit setOp instruction (extract concode; clobbers ax),
271 // using the following mapping:
272 // LLVM -> X86 signed X86 unsigned
274 // seteq -> sete sete
275 // setne -> setne setne
276 // setlt -> setl setb
277 // setgt -> setg seta
278 // setle -> setle setbe
279 // setge -> setge setae
280 switch (I.getOpcode ())
282 case Instruction::SetEQ:
283 BuildMI (BB, X86::SETE, 0, X86::AL);
285 case Instruction::SetGE:
286 if (unsignedComparison)
287 BuildMI (BB, X86::SETAE, 0, X86::AL);
289 BuildMI (BB, X86::SETGE, 0, X86::AL);
291 case Instruction::SetGT:
292 if (unsignedComparison)
293 BuildMI (BB, X86::SETA, 0, X86::AL);
295 BuildMI (BB, X86::SETG, 0, X86::AL);
297 case Instruction::SetLE:
298 if (unsignedComparison)
299 BuildMI (BB, X86::SETBE, 0, X86::AL);
301 BuildMI (BB, X86::SETLE, 0, X86::AL);
303 case Instruction::SetLT:
304 if (unsignedComparison)
305 BuildMI (BB, X86::SETB, 0, X86::AL);
307 BuildMI (BB, X86::SETL, 0, X86::AL);
309 case Instruction::SetNE:
310 BuildMI (BB, X86::SETNE, 0, X86::AL);
313 visitInstruction (I);
316 // Put it in the result using a move.
320 BuildMI (BB, X86::MOVrr8, 1, resultReg).addReg (X86::AL);
323 BuildMI (BB, X86::MOVZXr16r8, 1, resultReg).addReg (X86::AL);
326 BuildMI (BB, X86::MOVZXr32r8, 1, resultReg).addReg (X86::AL);
330 visitInstruction (I);
336 /// 'ret' instruction - Here we are interested in meeting the x86 ABI. As such,
337 /// we have the following possibilities:
339 /// ret void: No return value, simply emit a 'ret' instruction
340 /// ret sbyte, ubyte : Extend value into EAX and return
341 /// ret short, ushort: Extend value into EAX and return
342 /// ret int, uint : Move value into EAX and return
343 /// ret pointer : Move value into EAX and return
344 /// ret long, ulong : Move value into EAX/EDX and return
345 /// ret float/double : Top of FP stack
347 void ISel::visitReturnInst (ReturnInst &I) {
348 if (I.getNumOperands() == 0) {
349 // Emit a 'ret' instruction
350 BuildMI(BB, X86::RET, 0);
354 unsigned val = getReg(I.getOperand(0));
355 unsigned Class = getClass(I.getOperand(0)->getType());
356 bool isUnsigned = I.getOperand(0)->getType()->isUnsigned();
359 // ret sbyte, ubyte: Extend value into EAX and return
361 BuildMI (BB, X86::MOVZXr32r8, 1, X86::EAX).addReg (val);
363 BuildMI (BB, X86::MOVSXr32r8, 1, X86::EAX).addReg (val);
366 // ret short, ushort: Extend value into EAX and return
368 BuildMI (BB, X86::MOVZXr32r16, 1, X86::EAX).addReg (val);
370 BuildMI (BB, X86::MOVSXr32r16, 1, X86::EAX).addReg (val);
373 // ret int, uint, ptr: Move value into EAX and return
375 BuildMI(BB, X86::MOVrr32, 1, X86::EAX).addReg(val);
378 // ret float/double: top of FP stack
380 case cFloat: // Floats
381 BuildMI(BB, X86::FLDr4, 1).addReg(val);
383 case cDouble: // Doubles
384 BuildMI(BB, X86::FLDr8, 1).addReg(val);
387 // ret long: use EAX(least significant 32 bits)/EDX (most
388 // significant 32)...uh, I think so Brain, but how do i call
389 // up the two parts of the value from inside this mouse
395 // Emit a 'ret' instruction
396 BuildMI(BB, X86::RET, 0);
399 /// visitBranchInst - Handle conditional and unconditional branches here. Note
400 /// that since code layout is frozen at this point, that if we are trying to
401 /// jump to a block that is the immediate successor of the current block, we can
402 /// just make a fall-through. (but we don't currently).
405 ISel::visitBranchInst (BranchInst & BI)
407 if (BI.isConditional ())
409 BasicBlock *ifTrue = BI.getSuccessor (0);
410 BasicBlock *ifFalse = BI.getSuccessor (1); // this is really unobvious
412 // simplest thing I can think of: compare condition with zero,
413 // followed by jump-if-equal to ifFalse, and jump-if-nonequal to
415 unsigned int condReg = getReg (BI.getCondition ());
416 BuildMI (BB, X86::CMPri8, 2, X86::EFLAGS).addReg (condReg).addZImm (0);
417 BuildMI (BB, X86::JNE, 1).addPCDisp (BI.getSuccessor (0));
418 BuildMI (BB, X86::JE, 1).addPCDisp (BI.getSuccessor (1));
420 else // unconditional branch
422 BuildMI (BB, X86::JMP, 1).addPCDisp (BI.getSuccessor (0));
427 /// visitSimpleBinary - Implement simple binary operators for integral types...
428 /// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or,
431 void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
432 if (B.getType() == Type::BoolTy) // FIXME: Handle bools for logicals
435 unsigned Class = getClass(B.getType());
436 if (Class > 2) // FIXME: Handle longs
439 static const unsigned OpcodeTab[][4] = {
440 // Arithmetic operators
441 { X86::ADDrr8, X86::ADDrr16, X86::ADDrr32, 0 }, // ADD
442 { X86::SUBrr8, X86::SUBrr16, X86::SUBrr32, 0 }, // SUB
445 { X86::ANDrr8, X86::ANDrr16, X86::ANDrr32, 0 }, // AND
446 { X86:: ORrr8, X86:: ORrr16, X86:: ORrr32, 0 }, // OR
447 { X86::XORrr8, X86::XORrr16, X86::XORrr32, 0 }, // XOR
450 unsigned Opcode = OpcodeTab[OperatorClass][Class];
451 unsigned Op0r = getReg(B.getOperand(0));
452 unsigned Op1r = getReg(B.getOperand(1));
453 BuildMI(BB, Opcode, 2, getReg(B)).addReg(Op0r).addReg(Op1r);
456 /// visitMul - Multiplies are not simple binary operators because they must deal
457 /// with the EAX register explicitly.
459 void ISel::visitMul(BinaryOperator &I) {
460 unsigned Class = getClass(I.getType());
461 if (Class > 2) // FIXME: Handle longs
464 static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
465 static const unsigned Clobbers[] ={ X86::AH , X86::DX , X86::EDX };
466 static const unsigned MulOpcode[]={ X86::MULrr8, X86::MULrr16, X86::MULrr32 };
467 static const unsigned MovOpcode[]={ X86::MOVrr8, X86::MOVrr16, X86::MOVrr32 };
469 unsigned Reg = Regs[Class];
470 unsigned Clobber = Clobbers[Class];
471 unsigned Op0Reg = getReg(I.getOperand(0));
472 unsigned Op1Reg = getReg(I.getOperand(1));
474 // Put the first operand into one of the A registers...
475 BuildMI(BB, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
477 // Emit the appropriate multiply instruction...
478 BuildMI(BB, MulOpcode[Class], 3)
479 .addReg(Reg, UseAndDef).addReg(Op1Reg).addClobber(Clobber);
481 // Put the result into the destination register...
482 BuildMI(BB, MovOpcode[Class], 1, getReg(I)).addReg(Reg);
486 /// visitDivRem - Handle division and remainder instructions... these
487 /// instruction both require the same instructions to be generated, they just
488 /// select the result from a different register. Note that both of these
489 /// instructions work differently for signed and unsigned operands.
491 void ISel::visitDivRem(BinaryOperator &I) {
492 unsigned Class = getClass(I.getType());
493 if (Class > 2) // FIXME: Handle longs
496 static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
497 static const unsigned MovOpcode[]={ X86::MOVrr8, X86::MOVrr16, X86::MOVrr32 };
498 static const unsigned ExtOpcode[]={ X86::CBW , X86::CWD , X86::CDQ };
499 static const unsigned ClrOpcode[]={ X86::XORrr8, X86::XORrr16, X86::XORrr32 };
500 static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX };
502 static const unsigned DivOpcode[][4] = {
503 { X86::DIVrr8 , X86::DIVrr16 , X86::DIVrr32 , 0 }, // Unsigned division
504 { X86::IDIVrr8, X86::IDIVrr16, X86::IDIVrr32, 0 }, // Signed division
507 bool isSigned = I.getType()->isSigned();
508 unsigned Reg = Regs[Class];
509 unsigned ExtReg = ExtRegs[Class];
510 unsigned Op0Reg = getReg(I.getOperand(0));
511 unsigned Op1Reg = getReg(I.getOperand(1));
513 // Put the first operand into one of the A registers...
514 BuildMI(BB, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
517 // Emit a sign extension instruction...
518 BuildMI(BB, ExtOpcode[Class], 1, ExtReg).addReg(Reg);
520 // If unsigned, emit a zeroing instruction... (reg = xor reg, reg)
521 BuildMI(BB, ClrOpcode[Class], 2, ExtReg).addReg(ExtReg).addReg(ExtReg);
524 // Emit the appropriate divide or remainder instruction...
525 BuildMI(BB, DivOpcode[isSigned][Class], 2)
526 .addReg(Reg, UseAndDef).addReg(ExtReg, UseAndDef).addReg(Op1Reg);
528 // Figure out which register we want to pick the result out of...
529 unsigned DestReg = (I.getOpcode() == Instruction::Div) ? Reg : ExtReg;
531 // Put the result into the destination register...
532 BuildMI(BB, MovOpcode[Class], 1, getReg(I)).addReg(DestReg);
536 /// Shift instructions: 'shl', 'sar', 'shr' - Some special cases here
537 /// for constant immediate shift values, and for constant immediate
538 /// shift values equal to 1. Even the general case is sort of special,
539 /// because the shift amount has to be in CL, not just any old register.
541 void ISel::visitShiftInst (ShiftInst &I) {
542 unsigned Op0r = getReg (I.getOperand(0));
543 unsigned DestReg = getReg(I);
544 bool isLeftShift = I.getOpcode() == Instruction::Shl;
545 bool isOperandSigned = I.getType()->isUnsigned();
546 unsigned OperandClass = getClass(I.getType());
548 if (OperandClass > 2)
549 visitInstruction(I); // Can't handle longs yet!
551 if (ConstantUInt *CUI = dyn_cast <ConstantUInt> (I.getOperand (1)))
553 // The shift amount is constant, guaranteed to be a ubyte. Get its value.
554 assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?");
555 unsigned char shAmt = CUI->getValue();
557 static const unsigned ConstantOperand[][4] = {
558 { X86::SHRir8, X86::SHRir16, X86::SHRir32, 0 }, // SHR
559 { X86::SARir8, X86::SARir16, X86::SARir32, 0 }, // SAR
560 { X86::SHLir8, X86::SHLir16, X86::SHLir32, 0 }, // SHL
561 { X86::SHLir8, X86::SHLir16, X86::SHLir32, 0 }, // SAL = SHL
564 const unsigned *OpTab = // Figure out the operand table to use
565 ConstantOperand[isLeftShift*2+isOperandSigned];
567 // Emit: <insn> reg, shamt (shift-by-immediate opcode "ir" form.)
568 BuildMI(BB, OpTab[OperandClass], 2, DestReg).addReg(Op0r).addZImm(shAmt);
572 // The shift amount is non-constant.
574 // In fact, you can only shift with a variable shift amount if
575 // that amount is already in the CL register, so we have to put it
579 // Emit: move cl, shiftAmount (put the shift amount in CL.)
580 BuildMI(BB, X86::MOVrr8, 1, X86::CL).addReg(getReg(I.getOperand(1)));
582 // This is a shift right (SHR).
583 static const unsigned NonConstantOperand[][4] = {
584 { X86::SHRrr8, X86::SHRrr16, X86::SHRrr32, 0 }, // SHR
585 { X86::SARrr8, X86::SARrr16, X86::SARrr32, 0 }, // SAR
586 { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32, 0 }, // SHL
587 { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32, 0 }, // SAL = SHL
590 const unsigned *OpTab = // Figure out the operand table to use
591 NonConstantOperand[isLeftShift*2+isOperandSigned];
593 BuildMI(BB, OpTab[OperandClass], 2, DestReg).addReg(Op0r).addReg(X86::CL);
598 /// visitLoadInst - Implement LLVM load instructions in terms of the x86 'mov'
601 void ISel::visitLoadInst(LoadInst &I) {
602 unsigned Class = getClass(I.getType());
603 if (Class > 2) // FIXME: Handle longs and others...
606 static const unsigned Opcode[] = { X86::MOVmr8, X86::MOVmr16, X86::MOVmr32 };
608 unsigned AddressReg = getReg(I.getOperand(0));
609 addDirectMem(BuildMI(BB, Opcode[Class], 4, getReg(I)), AddressReg);
613 /// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov'
616 void ISel::visitStoreInst(StoreInst &I) {
617 unsigned Class = getClass(I.getOperand(0)->getType());
618 if (Class > 2) // FIXME: Handle longs and others...
621 static const unsigned Opcode[] = { X86::MOVrm8, X86::MOVrm16, X86::MOVrm32 };
623 unsigned ValReg = getReg(I.getOperand(0));
624 unsigned AddressReg = getReg(I.getOperand(1));
625 addDirectMem(BuildMI(BB, Opcode[Class], 1+4), AddressReg).addReg(ValReg);
629 /// visitPHINode - Turn an LLVM PHI node into an X86 PHI node...
631 void ISel::visitPHINode(PHINode &PN) {
632 MachineInstr *MI = BuildMI(BB, X86::PHI, PN.getNumOperands(), getReg(PN));
634 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
635 // FIXME: This will put constants after the PHI nodes in the block, which
636 // is invalid. They should be put inline into the PHI node eventually.
638 MI->addRegOperand(getReg(PN.getIncomingValue(i)));
639 MI->addPCDispOperand(PN.getIncomingBlock(i));
644 /// createSimpleX86InstructionSelector - This pass converts an LLVM function
645 /// into a machine code representation is a very simple peep-hole fashion. The
646 /// generated code sucks but the implementation is nice and simple.
648 Pass *createSimpleX86InstructionSelector(TargetMachine &TM) {