1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
13 //===----------------------------------------------------------------------===//
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "X86ISelLowering.h"
19 #include "llvm/GlobalValue.h"
20 #include "llvm/Instructions.h"
21 #include "llvm/Support/CFG.h"
22 #include "llvm/CodeGen/MachineConstantPool.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/SSARegMap.h"
27 #include "llvm/CodeGen/SelectionDAGISel.h"
28 #include "llvm/Target/TargetMachine.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/ADT/Statistic.h"
33 //===----------------------------------------------------------------------===//
34 // Pattern Matcher Implementation
35 //===----------------------------------------------------------------------===//
38 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
39 /// SDOperand's instead of register numbers for the leaves of the matched
41 struct X86ISelAddressMode {
48 struct { // This is really a union, discriminated by BaseType!
59 : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), GV(0) {
66 NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
68 //===--------------------------------------------------------------------===//
69 /// ISel - X86 specific code to select X86 machine instructions for
70 /// SelectionDAG operations.
72 class X86DAGToDAGISel : public SelectionDAGISel {
73 /// ContainsFPCode - Every instruction we select that uses or defines a FP
74 /// register should set this to true.
77 /// X86Lowering - This object fully describes how to lower LLVM code to an
78 /// X86-specific SelectionDAG.
79 X86TargetLowering X86Lowering;
81 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
82 /// make the right decision when generating code for different targets.
83 const X86Subtarget *Subtarget;
85 X86DAGToDAGISel(TargetMachine &TM)
86 : SelectionDAGISel(X86Lowering), X86Lowering(TM) {
87 Subtarget = &TM.getSubtarget<X86Subtarget>();
90 virtual const char *getPassName() const {
91 return "X86 DAG->DAG Instruction Selection";
94 /// InstructionSelectBasicBlock - This callback is invoked by
95 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
96 virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
98 // Include the pieces autogenerated from the target description.
99 #include "X86GenDAGISel.inc"
102 SDOperand Select(SDOperand N);
104 bool MatchAddress(SDOperand N, X86ISelAddressMode &AM);
105 bool SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
106 SDOperand &Index, SDOperand &Disp);
107 bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
108 SDOperand &Index, SDOperand &Disp);
109 bool TryFoldLoad(SDOperand N, SDOperand &Base, SDOperand &Scale,
110 SDOperand &Index, SDOperand &Disp);
112 inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
113 SDOperand &Scale, SDOperand &Index,
115 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
116 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, MVT::i32) : AM.Base.Reg;
117 Scale = getI8Imm(AM.Scale);
119 Disp = AM.GV ? CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp)
120 : getI32Imm(AM.Disp);
123 /// getI8Imm - Return a target constant with the specified value, of type
125 inline SDOperand getI8Imm(unsigned Imm) {
126 return CurDAG->getTargetConstant(Imm, MVT::i8);
129 /// getI16Imm - Return a target constant with the specified value, of type
131 inline SDOperand getI16Imm(unsigned Imm) {
132 return CurDAG->getTargetConstant(Imm, MVT::i16);
135 /// getI32Imm - Return a target constant with the specified value, of type
137 inline SDOperand getI32Imm(unsigned Imm) {
138 return CurDAG->getTargetConstant(Imm, MVT::i32);
143 /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
144 /// when it has created a SelectionDAG for us to codegen.
145 void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
147 MachineFunction::iterator FirstMBB = BB;
149 // Codegen the basic block.
150 DAG.setRoot(Select(DAG.getRoot()));
152 DAG.RemoveDeadNodes();
154 // Emit machine code to BB.
155 ScheduleAndEmitDAG(DAG);
157 // If we are emitting FP stack code, scan the basic block to determine if this
158 // block defines any FP values. If so, put an FP_REG_KILL instruction before
159 // the terminator of the block.
160 if (X86Vector < SSE2) {
161 // Note that FP stack instructions *are* used in SSE code when returning
162 // values, but these are not live out of the basic block, so we don't need
163 // an FP_REG_KILL in this case either.
164 bool ContainsFPCode = false;
166 // Scan all of the machine instructions in these MBBs, checking for FP
168 MachineFunction::iterator MBBI = FirstMBB;
170 for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
171 !ContainsFPCode && I != E; ++I) {
172 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
173 if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
174 MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
175 RegMap->getRegClass(I->getOperand(0).getReg()) ==
176 X86::RFPRegisterClass) {
177 ContainsFPCode = true;
182 } while (!ContainsFPCode && &*(MBBI++) != BB);
184 // Check PHI nodes in successor blocks. These PHI's will be lowered to have
185 // a copy of the input value in this block.
186 if (!ContainsFPCode) {
187 // Final check, check LLVM BB's that are successors to the LLVM BB
188 // corresponding to BB for FP PHI nodes.
189 const BasicBlock *LLVMBB = BB->getBasicBlock();
191 for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
192 !ContainsFPCode && SI != E; ++SI) {
193 for (BasicBlock::const_iterator II = SI->begin();
194 (PN = dyn_cast<PHINode>(II)); ++II) {
195 if (PN->getType()->isFloatingPoint()) {
196 ContainsFPCode = true;
203 // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
204 if (ContainsFPCode) {
205 BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
211 /// FIXME: copied from X86ISelPattern.cpp
212 /// MatchAddress - Add the specified node to the specified addressing mode,
213 /// returning true if it cannot be done. This just pattern matches for the
215 bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM) {
216 switch (N.getOpcode()) {
218 case ISD::FrameIndex:
219 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
220 AM.BaseType = X86ISelAddressMode::FrameIndexBase;
221 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
226 case ISD::ConstantPool:
227 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
228 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N)) {
229 AM.BaseType = X86ISelAddressMode::ConstantPoolBase;
230 AM.Base.Reg = CurDAG->getTargetConstantPool(CP->get(), MVT::i32);
236 case ISD::GlobalAddress:
237 case ISD::TargetGlobalAddress:
239 AM.GV = cast<GlobalAddressSDNode>(N)->getGlobal();
245 AM.Disp += cast<ConstantSDNode>(N)->getValue();
249 if (AM.IndexReg.Val == 0 && AM.Scale == 1)
250 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
251 unsigned Val = CN->getValue();
252 if (Val == 1 || Val == 2 || Val == 3) {
254 SDOperand ShVal = N.Val->getOperand(0);
256 // Okay, we know that we have a scale by now. However, if the scaled
257 // value is an add of something and a constant, we can fold the
258 // constant into the disp field here.
259 if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
260 isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
261 AM.IndexReg = ShVal.Val->getOperand(0);
262 ConstantSDNode *AddVal =
263 cast<ConstantSDNode>(ShVal.Val->getOperand(1));
264 AM.Disp += AddVal->getValue() << Val;
274 // X*[3,5,9] -> X+X*[2,4,8]
275 if (AM.IndexReg.Val == 0 && AM.BaseType == X86ISelAddressMode::RegBase &&
276 AM.Base.Reg.Val == 0)
277 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
278 if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
279 AM.Scale = unsigned(CN->getValue())-1;
281 SDOperand MulVal = N.Val->getOperand(0);
284 // Okay, we know that we have a scale by now. However, if the scaled
285 // value is an add of something and a constant, we can fold the
286 // constant into the disp field here.
287 if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
288 isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
289 Reg = MulVal.Val->getOperand(0);
290 ConstantSDNode *AddVal =
291 cast<ConstantSDNode>(MulVal.Val->getOperand(1));
292 AM.Disp += AddVal->getValue() * CN->getValue();
294 Reg = N.Val->getOperand(0);
297 AM.IndexReg = AM.Base.Reg = Reg;
303 X86ISelAddressMode Backup = AM;
304 if (!MatchAddress(N.Val->getOperand(0), AM) &&
305 !MatchAddress(N.Val->getOperand(1), AM))
308 if (!MatchAddress(N.Val->getOperand(1), AM) &&
309 !MatchAddress(N.Val->getOperand(0), AM))
316 // Is the base register already occupied?
317 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
318 // If so, check to see if the scale index register is set.
319 if (AM.IndexReg.Val == 0) {
325 // Otherwise, we cannot select it.
329 // Default, generate it as a register.
330 AM.BaseType = X86ISelAddressMode::RegBase;
335 /// SelectAddr - returns true if it is able pattern match an addressing mode.
336 /// It returns the operands which make up the maximal addressing mode it can
337 /// match by reference.
338 bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
339 SDOperand &Index, SDOperand &Disp) {
340 X86ISelAddressMode AM;
341 if (!MatchAddress(N, AM)) {
342 if (AM.BaseType == X86ISelAddressMode::RegBase) {
344 AM.Base.Reg = Select(AM.Base.Reg);
346 AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
349 AM.IndexReg = Select(AM.IndexReg);
351 AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
353 getAddressOperands(AM, Base, Scale, Index, Disp);
359 bool X86DAGToDAGISel::TryFoldLoad(SDOperand N, SDOperand &Base,
360 SDOperand &Scale, SDOperand &Index,
362 if (N.getOpcode() == ISD::LOAD && N.hasOneUse() &&
363 CodeGenMap.count(N.getValue(1)) == 0)
364 return SelectAddr(N.getOperand(1), Base, Scale, Index, Disp);
368 static bool isRegister0(SDOperand Op) {
369 if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op))
370 return (R->getReg() == 0);
374 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
375 /// mode it matches can be cost effectively emitted as an LEA instruction.
376 /// For X86, it always is unless it's just a (Reg + const).
377 bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
379 SDOperand &Index, SDOperand &Disp) {
380 X86ISelAddressMode AM;
381 if (!MatchAddress(N, AM)) {
382 bool SelectBase = false;
383 bool SelectIndex = false;
385 if (AM.BaseType == X86ISelAddressMode::RegBase) {
386 if (AM.Base.Reg.Val) {
390 AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
394 if (AM.IndexReg.Val) {
397 AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
401 unsigned Complexity = 0;
408 else if (AM.Disp > 1)
415 AM.Base.Reg = Select(AM.Base.Reg);
417 AM.IndexReg = Select(AM.IndexReg);
419 getAddressOperands(AM, Base, Scale, Index, Disp);
425 SDOperand X86DAGToDAGISel::Select(SDOperand N) {
426 SDNode *Node = N.Val;
427 MVT::ValueType NVT = Node->getValueType(0);
429 unsigned Opcode = Node->getOpcode();
431 if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER)
432 return N; // Already selected.
438 if (Opcode == ISD::MULHU)
440 default: assert(0 && "Unsupported VT!");
441 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
442 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
443 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
447 default: assert(0 && "Unsupported VT!");
448 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
449 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
450 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
453 unsigned LoReg, HiReg;
455 default: assert(0 && "Unsupported VT!");
456 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
457 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
458 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
461 SDOperand N0 = Node->getOperand(0);
462 SDOperand N1 = Node->getOperand(1);
464 bool foldedLoad = false;
465 SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
466 foldedLoad = TryFoldLoad(N1, Tmp0, Tmp1, Tmp2, Tmp3);
467 // MULHU and MULHS are commmutative
469 foldedLoad = TryFoldLoad(N0, Tmp0, Tmp1, Tmp2, Tmp3);
471 N0 = Node->getOperand(1);
472 N1 = Node->getOperand(0);
476 SDOperand Chain = foldedLoad ? Select(N1.getOperand(0))
477 : CurDAG->getEntryNode();
480 Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
482 InFlag = Chain.getValue(1);
485 Chain = CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1,
486 Tmp2, Tmp3, Chain, InFlag);
487 InFlag = Chain.getValue(1);
489 InFlag = CurDAG->getTargetNode(Opc, MVT::Flag, Select(N1), InFlag);
492 SDOperand Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
493 CodeGenMap[N.getValue(0)] = Result;
495 CodeGenMap[N1.getValue(1)] = Result.getValue(1);
503 bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
504 bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
507 default: assert(0 && "Unsupported VT!");
508 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
509 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
510 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
514 default: assert(0 && "Unsupported VT!");
515 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
516 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
517 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
520 unsigned LoReg, HiReg;
521 unsigned ClrOpcode, SExtOpcode;
523 default: assert(0 && "Unsupported VT!");
525 LoReg = X86::AL; HiReg = X86::AH;
526 ClrOpcode = X86::MOV8ri;
527 SExtOpcode = X86::CBW;
530 LoReg = X86::AX; HiReg = X86::DX;
531 ClrOpcode = X86::MOV16ri;
532 SExtOpcode = X86::CWD;
535 LoReg = X86::EAX; HiReg = X86::EDX;
536 ClrOpcode = X86::MOV32ri;
537 SExtOpcode = X86::CDQ;
541 SDOperand N0 = Node->getOperand(0);
542 SDOperand N1 = Node->getOperand(1);
544 bool foldedLoad = false;
545 SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
546 foldedLoad = TryFoldLoad(N1, Tmp0, Tmp1, Tmp2, Tmp3);
547 SDOperand Chain = foldedLoad ? Select(N1.getOperand(0))
548 : CurDAG->getEntryNode();
551 Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
553 InFlag = Chain.getValue(1);
556 // Sign extend the low part into the high part.
557 InFlag = CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag);
559 // Zero out the high part, effectively zero extending the input.
561 CurDAG->getTargetNode(ClrOpcode, NVT,
562 CurDAG->getTargetConstant(0, NVT));
563 Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(HiReg, NVT),
565 InFlag = Chain.getValue(1);
569 Chain = CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1,
570 Tmp2, Tmp3, Chain, InFlag);
571 InFlag = Chain.getValue(1);
573 InFlag = CurDAG->getTargetNode(Opc, MVT::Flag, Select(N1), InFlag);
576 SDOperand Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg,
578 CodeGenMap[N.getValue(0)] = Result;
580 CodeGenMap[N1.getValue(1)] = Result.getValue(1);
584 case ISD::TRUNCATE: {
587 switch (Node->getOperand(0).getValueType()) {
588 default: assert(0 && "Unknown truncate!");
589 case MVT::i16: Reg = X86::AX; Opc = X86::MOV16rr; VT = MVT::i16; break;
590 case MVT::i32: Reg = X86::EAX; Opc = X86::MOV32rr; VT = MVT::i32; break;
592 SDOperand Tmp0 = Select(Node->getOperand(0));
593 SDOperand Tmp1 = CurDAG->getTargetNode(Opc, VT, Tmp0);
594 SDOperand InFlag = SDOperand(0,0);
595 SDOperand Result = CurDAG->getCopyToReg(CurDAG->getEntryNode(),
596 Reg, Tmp1, InFlag).getValue(1);
597 SDOperand Chain = Result.getValue(0);
598 InFlag = Result.getValue(1);
601 default: assert(0 && "Unknown truncate!");
602 case MVT::i8: Reg = X86::AL; Opc = X86::MOV8rr; VT = MVT::i8; break;
603 case MVT::i16: Reg = X86::AX; Opc = X86::MOV16rr; VT = MVT::i16; break;
606 Result = CurDAG->getCopyFromReg(Chain,
608 if (N.Val->hasOneUse())
609 return CurDAG->SelectNodeTo(N.Val, Opc, VT, Result);
611 return CodeGenMap[N] = CurDAG->getTargetNode(Opc, VT, Result);
615 case X86ISD::FP_TO_INT16_IN_MEM:
616 case X86ISD::FP_TO_INT32_IN_MEM:
617 case X86ISD::FP_TO_INT64_IN_MEM: {
618 assert(N.getOperand(1).getValueType() == MVT::f64);
620 // Change the floating point control register to use "round towards zero"
621 // mode when truncating to an integer value.
622 MachineFunction &MF = CurDAG->getMachineFunction();
623 int CWFI = MF.getFrameInfo()->CreateStackObject(2, 2);
624 SDOperand CWSlot = CurDAG->getFrameIndex(CWFI, MVT::i32);
625 SDOperand Base, Scale, Index, Disp;
626 (void)SelectAddr(CWSlot, Base, Scale, Index, Disp);
627 SDOperand Chain = N.getOperand(0);
629 // Save the control word.
630 Chain = CurDAG->getTargetNode(X86::FNSTCW16m, MVT::Other,
631 Base, Scale, Index, Disp, Chain);
633 // Load the old value of the high byte of the control word.
635 CurDAG->getTargetNode(X86::MOV16rm, MVT::i16, MVT::Other,
636 Base, Scale, Index, Disp, Chain);
637 Chain = OldCW.getValue(1);
639 // Set the high part to be round to zero...
640 Chain = CurDAG->getTargetNode(X86::MOV16mi, MVT::Other,
641 Base, Scale, Index, Disp,
642 CurDAG->getConstant(0xC7F, MVT::i16),
645 // Reload the modified control word now...
646 Chain = CurDAG->getTargetNode(X86::FLDCW16m, MVT::Other,
647 Base, Scale, Index, Disp, Chain);
649 // Restore the memory image of control word to original value
650 Chain = CurDAG->getTargetNode(X86::MOV16mr, MVT::Other,
651 Base, Scale, Index, Disp, OldCW, Chain);
654 case X86ISD::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
655 case X86ISD::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
656 case X86ISD::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
659 SDOperand N1 = Select(N.getOperand(1));
660 SDOperand Base2, Scale2, Index2, Disp2;
661 (void)SelectAddr(N.getOperand(2), Base2, Scale2, Index2, Disp2);
662 Chain = CurDAG->getTargetNode(Opc, MVT::Other,
663 Base2, Scale2, Index2, Disp2, N1, Chain);
665 // Reload the modified control word now...
667 Chain = CurDAG->getTargetNode(X86::FLDCW16m, MVT::Other,
668 Base, Scale, Index, Disp, Chain);
673 return SelectCode(N);
676 /// createX86ISelDag - This pass converts a legalized DAG into a
677 /// X86-specific DAG, ready for instruction scheduling.
679 FunctionPass *llvm::createX86ISelDag(TargetMachine &TM) {
680 return new X86DAGToDAGISel(TM);