1 //===-- InstSelectSimple.cpp - A simple instruction selector for x86 ------===//
3 // This file defines a simple peephole instruction selector for the x86 platform
5 //===----------------------------------------------------------------------===//
8 #include "X86InstrInfo.h"
9 #include "X86InstrBuilder.h"
10 #include "llvm/Function.h"
11 #include "llvm/iTerminators.h"
12 #include "llvm/iOperators.h"
13 #include "llvm/iOther.h"
14 #include "llvm/iPHINode.h"
15 #include "llvm/iMemory.h"
16 #include "llvm/Type.h"
17 #include "llvm/Constants.h"
18 #include "llvm/Pass.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/Target/TargetMachine.h"
22 #include "llvm/Support/InstVisitor.h"
23 #include "llvm/Target/MRegisterInfo.h"
26 using namespace MOTy; // Get Use, Def, UseAndDef
29 struct ISel : public FunctionPass, InstVisitor<ISel> {
31 MachineFunction *F; // The function we are compiling into
32 MachineBasicBlock *BB; // The current MBB we are compiling
35 std::map<Value*, unsigned> RegMap; // Mapping between Val's and SSA Regs
37 ISel(TargetMachine &tm)
38 : TM(tm), F(0), BB(0), CurReg(MRegisterInfo::FirstVirtualRegister) {}
40 /// runOnFunction - Top level implementation of instruction selection for
41 /// the entire function.
43 bool runOnFunction(Function &Fn) {
44 F = &MachineFunction::construct(&Fn, TM);
48 return false; // We never modify the LLVM itself.
51 /// visitBasicBlock - This method is called when we are visiting a new basic
52 /// block. This simply creates a new MachineBasicBlock to emit code into
53 /// and adds it to the current MachineFunction. Subsequent visit* for
54 /// instructions will be invoked for all instructions in the basic block.
56 void visitBasicBlock(BasicBlock &LLVM_BB) {
57 BB = new MachineBasicBlock(&LLVM_BB);
58 // FIXME: Use the auto-insert form when it's available
59 F->getBasicBlockList().push_back(BB);
62 // Visitation methods for various instructions. These methods simply emit
63 // fixed X86 code for each instruction.
65 void visitReturnInst(ReturnInst &RI);
66 void visitBranchInst(BranchInst &BI);
68 // Arithmetic operators
69 void visitSimpleBinary(BinaryOperator &B, unsigned OpcodeClass);
70 void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); }
71 void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); }
72 void visitMul(BinaryOperator &B);
74 void visitDiv(BinaryOperator &B) { visitDivRem(B); }
75 void visitRem(BinaryOperator &B) { visitDivRem(B); }
76 void visitDivRem(BinaryOperator &B);
79 void visitAnd(BinaryOperator &B) { visitSimpleBinary(B, 2); }
80 void visitOr (BinaryOperator &B) { visitSimpleBinary(B, 3); }
81 void visitXor(BinaryOperator &B) { visitSimpleBinary(B, 4); }
83 // Binary comparison operators
84 void visitSetCondInst(SetCondInst &I);
86 // Memory Instructions
87 void visitLoadInst(LoadInst &I);
88 void visitStoreInst(StoreInst &I);
91 void visitShiftInst(ShiftInst &I);
92 void visitPHINode(PHINode &I);
94 void visitInstruction(Instruction &I) {
95 std::cerr << "Cannot instruction select: " << I;
100 /// copyConstantToRegister - Output the instructions required to put the
101 /// specified constant into the specified register.
103 void copyConstantToRegister(Constant *C, unsigned Reg);
105 /// getReg - This method turns an LLVM value into a register number. This
106 /// is guaranteed to produce the same register number for a particular value
107 /// every time it is queried.
109 unsigned getReg(Value &V) { return getReg(&V); } // Allow references
110 unsigned getReg(Value *V) {
111 unsigned &Reg = RegMap[V];
116 // Add the mapping of regnumber => reg class to MachineFunction
118 TM.getRegisterInfo()->getRegClassForType(V->getType()));
121 // If this operand is a constant, emit the code to copy the constant into
122 // the register here...
124 if (Constant *C = dyn_cast<Constant>(V))
125 copyConstantToRegister(C, Reg);
132 /// TypeClass - Used by the X86 backend to group LLVM types by their basic X86
136 cByte, cShort, cInt, cLong, cFloat, cDouble
139 /// getClass - Turn a primitive type into a "class" number which is based on the
140 /// size of the type, and whether or not it is floating point.
142 static inline TypeClass getClass(const Type *Ty) {
143 switch (Ty->getPrimitiveID()) {
144 case Type::SByteTyID:
145 case Type::UByteTyID: return cByte; // Byte operands are class #0
146 case Type::ShortTyID:
147 case Type::UShortTyID: return cShort; // Short operands are class #1
150 case Type::PointerTyID: return cInt; // Int's and pointers are class #2
153 case Type::ULongTyID: return cLong; // Longs are class #3
154 case Type::FloatTyID: return cFloat; // Float is class #4
155 case Type::DoubleTyID: return cDouble; // Doubles are class #5
157 assert(0 && "Invalid type to getClass!");
158 return cByte; // not reached
163 /// copyConstantToRegister - Output the instructions required to put the
164 /// specified constant into the specified register.
166 void ISel::copyConstantToRegister(Constant *C, unsigned R) {
167 assert (!isa<ConstantExpr>(C) && "Constant expressions not yet handled!\n");
169 if (C->getType()->isIntegral()) {
170 unsigned Class = getClass(C->getType());
171 assert(Class != 3 && "Type not handled yet!");
173 static const unsigned IntegralOpcodeTab[] = {
174 X86::MOVir8, X86::MOVir16, X86::MOVir32
177 if (C->getType()->isSigned()) {
178 ConstantSInt *CSI = cast<ConstantSInt>(C);
179 BuildMI(BB, IntegralOpcodeTab[Class], 1, R).addSImm(CSI->getValue());
181 ConstantUInt *CUI = cast<ConstantUInt>(C);
182 BuildMI(BB, IntegralOpcodeTab[Class], 1, R).addZImm(CUI->getValue());
185 assert(0 && "Type not handled yet!");
190 /// SetCC instructions - Here we just emit boilerplate code to set a byte-sized
191 /// register, then move it to wherever the result should be.
192 /// We handle FP setcc instructions by pushing them, doing a
193 /// compare-and-pop-twice, and then copying the concodes to the main
194 /// processor's concodes (I didn't make this up, it's in the Intel manual)
197 ISel::visitSetCondInst (SetCondInst & I)
199 // The arguments are already supposed to be of the same type.
200 Value *var1 = I.getOperand (0);
201 Value *var2 = I.getOperand (1);
202 unsigned reg1 = getReg (var1);
203 unsigned reg2 = getReg (var2);
204 unsigned resultReg = getReg (I);
205 unsigned comparisonWidth = var1->getType ()->getPrimitiveSize ();
206 unsigned unsignedComparison = var1->getType ()->isUnsigned ();
207 unsigned resultWidth = I.getType ()->getPrimitiveSize ();
208 bool fpComparison = var1->getType ()->isFloatingPoint ();
211 // Push the variables on the stack with fldl opcodes.
212 // FIXME: assuming var1, var2 are in memory, if not, spill to
214 switch (comparisonWidth)
217 BuildMI (BB, X86::FLDr4, 1, X86::NoReg).addReg (reg1);
220 BuildMI (BB, X86::FLDr8, 1, X86::NoReg).addReg (reg1);
223 visitInstruction (I);
226 switch (comparisonWidth)
229 BuildMI (BB, X86::FLDr4, 1, X86::NoReg).addReg (reg2);
232 BuildMI (BB, X86::FLDr8, 1, X86::NoReg).addReg (reg2);
235 visitInstruction (I);
238 // (Non-trapping) compare and pop twice.
239 BuildMI (BB, X86::FUCOMPP, 0);
240 // Move fp status word (concodes) to ax.
241 BuildMI (BB, X86::FNSTSWr8, 1, X86::AX);
242 // Load real concodes from ax.
243 BuildMI (BB, X86::SAHF, 1).addReg(X86::AH);
246 { // integer comparison
247 // Emit: cmp <var1>, <var2> (do the comparison). We can
248 // compare 8-bit with 8-bit, 16-bit with 16-bit, 32-bit with
250 switch (comparisonWidth)
253 BuildMI (BB, X86::CMPrr8, 2).addReg (reg1).addReg (reg2);
256 BuildMI (BB, X86::CMPrr16, 2).addReg (reg1).addReg (reg2);
259 BuildMI (BB, X86::CMPrr32, 2).addReg (reg1).addReg (reg2);
263 visitInstruction (I);
267 // Emit setOp instruction (extract concode; clobbers ax),
268 // using the following mapping:
269 // LLVM -> X86 signed X86 unsigned
271 // seteq -> sete sete
272 // setne -> setne setne
273 // setlt -> setl setb
274 // setgt -> setg seta
275 // setle -> setle setbe
276 // setge -> setge setae
277 switch (I.getOpcode ())
279 case Instruction::SetEQ:
280 BuildMI (BB, X86::SETE, 0, X86::AL);
282 case Instruction::SetGE:
283 if (unsignedComparison)
284 BuildMI (BB, X86::SETAE, 0, X86::AL);
286 BuildMI (BB, X86::SETGE, 0, X86::AL);
288 case Instruction::SetGT:
289 if (unsignedComparison)
290 BuildMI (BB, X86::SETA, 0, X86::AL);
292 BuildMI (BB, X86::SETG, 0, X86::AL);
294 case Instruction::SetLE:
295 if (unsignedComparison)
296 BuildMI (BB, X86::SETBE, 0, X86::AL);
298 BuildMI (BB, X86::SETLE, 0, X86::AL);
300 case Instruction::SetLT:
301 if (unsignedComparison)
302 BuildMI (BB, X86::SETB, 0, X86::AL);
304 BuildMI (BB, X86::SETL, 0, X86::AL);
306 case Instruction::SetNE:
307 BuildMI (BB, X86::SETNE, 0, X86::AL);
310 visitInstruction (I);
313 // Put it in the result using a move.
317 BuildMI (BB, X86::MOVrr8, 1, resultReg).addReg (X86::AL);
320 BuildMI (BB, X86::MOVZXr16r8, 1, resultReg).addReg (X86::AL);
323 BuildMI (BB, X86::MOVZXr32r8, 1, resultReg).addReg (X86::AL);
327 visitInstruction (I);
333 /// 'ret' instruction - Here we are interested in meeting the x86 ABI. As such,
334 /// we have the following possibilities:
336 /// ret void: No return value, simply emit a 'ret' instruction
337 /// ret sbyte, ubyte : Extend value into EAX and return
338 /// ret short, ushort: Extend value into EAX and return
339 /// ret int, uint : Move value into EAX and return
340 /// ret pointer : Move value into EAX and return
341 /// ret long, ulong : Move value into EAX/EDX and return
342 /// ret float/double : Top of FP stack
344 void ISel::visitReturnInst (ReturnInst &I) {
345 if (I.getNumOperands() == 0) {
346 // Emit a 'ret' instruction
347 BuildMI(BB, X86::RET, 0);
351 unsigned val = getReg(I.getOperand(0));
352 unsigned Class = getClass(I.getOperand(0)->getType());
353 bool isUnsigned = I.getOperand(0)->getType()->isUnsigned();
356 // ret sbyte, ubyte: Extend value into EAX and return
358 BuildMI (BB, X86::MOVZXr32r8, 1, X86::EAX).addReg (val);
360 BuildMI (BB, X86::MOVSXr32r8, 1, X86::EAX).addReg (val);
363 // ret short, ushort: Extend value into EAX and return
365 BuildMI (BB, X86::MOVZXr32r16, 1, X86::EAX).addReg (val);
367 BuildMI (BB, X86::MOVSXr32r16, 1, X86::EAX).addReg (val);
370 // ret int, uint, ptr: Move value into EAX and return
372 BuildMI(BB, X86::MOVrr32, 1, X86::EAX).addReg(val);
375 // ret float/double: top of FP stack
377 case cFloat: // Floats
378 BuildMI(BB, X86::FLDr4, 1).addReg(val);
380 case cDouble: // Doubles
381 BuildMI(BB, X86::FLDr8, 1).addReg(val);
384 // ret long: use EAX(least significant 32 bits)/EDX (most
385 // significant 32)...uh, I think so Brain, but how do i call
386 // up the two parts of the value from inside this mouse
392 // Emit a 'ret' instruction
393 BuildMI(BB, X86::RET, 0);
396 /// visitBranchInst - Handle conditional and unconditional branches here. Note
397 /// that since code layout is frozen at this point, that if we are trying to
398 /// jump to a block that is the immediate successor of the current block, we can
399 /// just make a fall-through. (but we don't currently).
402 ISel::visitBranchInst (BranchInst & BI)
404 if (BI.isConditional ())
406 BasicBlock *ifTrue = BI.getSuccessor (0);
407 BasicBlock *ifFalse = BI.getSuccessor (1); // this is really unobvious
409 // simplest thing I can think of: compare condition with zero,
410 // followed by jump-if-equal to ifFalse, and jump-if-nonequal to
412 unsigned int condReg = getReg (BI.getCondition ());
413 BuildMI (BB, X86::CMPri8, 2).addReg (condReg).addZImm (0);
414 BuildMI (BB, X86::JNE, 1).addPCDisp (BI.getSuccessor (0));
415 BuildMI (BB, X86::JE, 1).addPCDisp (BI.getSuccessor (1));
417 else // unconditional branch
419 BuildMI (BB, X86::JMP, 1).addPCDisp (BI.getSuccessor (0));
424 /// visitSimpleBinary - Implement simple binary operators for integral types...
425 /// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or,
428 void ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
429 if (B.getType() == Type::BoolTy) // FIXME: Handle bools for logicals
432 unsigned Class = getClass(B.getType());
433 if (Class > 2) // FIXME: Handle longs
436 static const unsigned OpcodeTab[][4] = {
437 // Arithmetic operators
438 { X86::ADDrr8, X86::ADDrr16, X86::ADDrr32, 0 }, // ADD
439 { X86::SUBrr8, X86::SUBrr16, X86::SUBrr32, 0 }, // SUB
442 { X86::ANDrr8, X86::ANDrr16, X86::ANDrr32, 0 }, // AND
443 { X86:: ORrr8, X86:: ORrr16, X86:: ORrr32, 0 }, // OR
444 { X86::XORrr8, X86::XORrr16, X86::XORrr32, 0 }, // XOR
447 unsigned Opcode = OpcodeTab[OperatorClass][Class];
448 unsigned Op0r = getReg(B.getOperand(0));
449 unsigned Op1r = getReg(B.getOperand(1));
450 BuildMI(BB, Opcode, 2, getReg(B)).addReg(Op0r).addReg(Op1r);
453 /// visitMul - Multiplies are not simple binary operators because they must deal
454 /// with the EAX register explicitly.
456 void ISel::visitMul(BinaryOperator &I) {
457 unsigned Class = getClass(I.getType());
458 if (Class > 2) // FIXME: Handle longs
461 static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
462 static const unsigned Clobbers[] ={ X86::AH , X86::DX , X86::EDX };
463 static const unsigned MulOpcode[]={ X86::MULrr8, X86::MULrr16, X86::MULrr32 };
464 static const unsigned MovOpcode[]={ X86::MOVrr8, X86::MOVrr16, X86::MOVrr32 };
466 unsigned Reg = Regs[Class];
467 unsigned Clobber = Clobbers[Class];
468 unsigned Op0Reg = getReg(I.getOperand(0));
469 unsigned Op1Reg = getReg(I.getOperand(1));
471 // Put the first operand into one of the A registers...
472 BuildMI(BB, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
474 // Emit the appropriate multiply instruction...
475 BuildMI(BB, MulOpcode[Class], 3)
476 .addReg(Reg, UseAndDef).addReg(Op1Reg).addClobber(Clobber);
478 // Put the result into the destination register...
479 BuildMI(BB, MovOpcode[Class], 1, getReg(I)).addReg(Reg);
483 /// visitDivRem - Handle division and remainder instructions... these
484 /// instruction both require the same instructions to be generated, they just
485 /// select the result from a different register. Note that both of these
486 /// instructions work differently for signed and unsigned operands.
488 void ISel::visitDivRem(BinaryOperator &I) {
489 unsigned Class = getClass(I.getType());
490 if (Class > 2) // FIXME: Handle longs
493 static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
494 static const unsigned MovOpcode[]={ X86::MOVrr8, X86::MOVrr16, X86::MOVrr32 };
495 static const unsigned ExtOpcode[]={ X86::CBW , X86::CWD , X86::CDQ };
496 static const unsigned ClrOpcode[]={ X86::XORrr8, X86::XORrr16, X86::XORrr32 };
497 static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX };
499 static const unsigned DivOpcode[][4] = {
500 { X86::DIVrr8 , X86::DIVrr16 , X86::DIVrr32 , 0 }, // Unsigned division
501 { X86::IDIVrr8, X86::IDIVrr16, X86::IDIVrr32, 0 }, // Signed division
504 bool isSigned = I.getType()->isSigned();
505 unsigned Reg = Regs[Class];
506 unsigned ExtReg = ExtRegs[Class];
507 unsigned Op0Reg = getReg(I.getOperand(0));
508 unsigned Op1Reg = getReg(I.getOperand(1));
510 // Put the first operand into one of the A registers...
511 BuildMI(BB, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
514 // Emit a sign extension instruction...
515 BuildMI(BB, ExtOpcode[Class], 1, ExtReg).addReg(Reg);
517 // If unsigned, emit a zeroing instruction... (reg = xor reg, reg)
518 BuildMI(BB, ClrOpcode[Class], 2, ExtReg).addReg(ExtReg).addReg(ExtReg);
521 // Emit the appropriate divide or remainder instruction...
522 BuildMI(BB, DivOpcode[isSigned][Class], 2)
523 .addReg(Reg, UseAndDef).addReg(ExtReg, UseAndDef).addReg(Op1Reg);
525 // Figure out which register we want to pick the result out of...
526 unsigned DestReg = (I.getOpcode() == Instruction::Div) ? Reg : ExtReg;
528 // Put the result into the destination register...
529 BuildMI(BB, MovOpcode[Class], 1, getReg(I)).addReg(DestReg);
533 /// Shift instructions: 'shl', 'sar', 'shr' - Some special cases here
534 /// for constant immediate shift values, and for constant immediate
535 /// shift values equal to 1. Even the general case is sort of special,
536 /// because the shift amount has to be in CL, not just any old register.
538 void ISel::visitShiftInst (ShiftInst &I) {
539 unsigned Op0r = getReg (I.getOperand(0));
540 unsigned DestReg = getReg(I);
541 bool isLeftShift = I.getOpcode() == Instruction::Shl;
542 bool isOperandSigned = I.getType()->isUnsigned();
543 unsigned OperandClass = getClass(I.getType());
545 if (OperandClass > 2)
546 visitInstruction(I); // Can't handle longs yet!
548 if (ConstantUInt *CUI = dyn_cast <ConstantUInt> (I.getOperand (1)))
550 // The shift amount is constant, guaranteed to be a ubyte. Get its value.
551 assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?");
552 unsigned char shAmt = CUI->getValue();
554 static const unsigned ConstantOperand[][4] = {
555 { X86::SHRir8, X86::SHRir16, X86::SHRir32, 0 }, // SHR
556 { X86::SARir8, X86::SARir16, X86::SARir32, 0 }, // SAR
557 { X86::SHLir8, X86::SHLir16, X86::SHLir32, 0 }, // SHL
558 { X86::SHLir8, X86::SHLir16, X86::SHLir32, 0 }, // SAL = SHL
561 const unsigned *OpTab = // Figure out the operand table to use
562 ConstantOperand[isLeftShift*2+isOperandSigned];
564 // Emit: <insn> reg, shamt (shift-by-immediate opcode "ir" form.)
565 BuildMI(BB, OpTab[OperandClass], 2, DestReg).addReg(Op0r).addZImm(shAmt);
569 // The shift amount is non-constant.
571 // In fact, you can only shift with a variable shift amount if
572 // that amount is already in the CL register, so we have to put it
576 // Emit: move cl, shiftAmount (put the shift amount in CL.)
577 BuildMI(BB, X86::MOVrr8, 1, X86::CL).addReg(getReg(I.getOperand(1)));
579 // This is a shift right (SHR).
580 static const unsigned NonConstantOperand[][4] = {
581 { X86::SHRrr8, X86::SHRrr16, X86::SHRrr32, 0 }, // SHR
582 { X86::SARrr8, X86::SARrr16, X86::SARrr32, 0 }, // SAR
583 { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32, 0 }, // SHL
584 { X86::SHLrr8, X86::SHLrr16, X86::SHLrr32, 0 }, // SAL = SHL
587 const unsigned *OpTab = // Figure out the operand table to use
588 NonConstantOperand[isLeftShift*2+isOperandSigned];
590 BuildMI(BB, OpTab[OperandClass], 2, DestReg).addReg(Op0r).addReg(X86::CL);
595 /// visitLoadInst - Implement LLVM load instructions in terms of the x86 'mov'
598 void ISel::visitLoadInst(LoadInst &I) {
599 unsigned Class = getClass(I.getType());
600 if (Class > 2) // FIXME: Handle longs and others...
603 static const unsigned Opcode[] = { X86::MOVmr8, X86::MOVmr16, X86::MOVmr32 };
605 unsigned AddressReg = getReg(I.getOperand(0));
606 addDirectMem(BuildMI(BB, Opcode[Class], 4, getReg(I)), AddressReg);
610 /// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov'
613 void ISel::visitStoreInst(StoreInst &I) {
614 unsigned Class = getClass(I.getOperand(0)->getType());
615 if (Class > 2) // FIXME: Handle longs and others...
618 static const unsigned Opcode[] = { X86::MOVrm8, X86::MOVrm16, X86::MOVrm32 };
620 unsigned ValReg = getReg(I.getOperand(0));
621 unsigned AddressReg = getReg(I.getOperand(1));
622 addDirectMem(BuildMI(BB, Opcode[Class], 1+4), AddressReg).addReg(ValReg);
626 /// visitPHINode - Turn an LLVM PHI node into an X86 PHI node...
628 void ISel::visitPHINode(PHINode &PN) {
629 MachineInstr *MI = BuildMI(BB, X86::PHI, PN.getNumOperands(), getReg(PN));
631 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
632 // FIXME: This will put constants after the PHI nodes in the block, which
633 // is invalid. They should be put inline into the PHI node eventually.
635 MI->addRegOperand(getReg(PN.getIncomingValue(i)));
636 MI->addPCDispOperand(PN.getIncomingBlock(i));
641 /// createSimpleX86InstructionSelector - This pass converts an LLVM function
642 /// into a machine code representation is a very simple peep-hole fashion. The
643 /// generated code sucks but the implementation is nice and simple.
645 Pass *createSimpleX86InstructionSelector(TargetMachine &TM) {