1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "x86-isel"
17 #include "X86InstrBuilder.h"
18 #include "X86ISelLowering.h"
19 #include "X86RegisterInfo.h"
20 #include "X86Subtarget.h"
21 #include "X86TargetMachine.h"
22 #include "llvm/GlobalValue.h"
23 #include "llvm/Instructions.h"
24 #include "llvm/Intrinsics.h"
25 #include "llvm/Support/CFG.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/SSARegMap.h"
31 #include "llvm/CodeGen/SelectionDAGISel.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Support/Compiler.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/ADT/Statistic.h"
42 //===----------------------------------------------------------------------===//
43 // Pattern Matcher Implementation
44 //===----------------------------------------------------------------------===//
47 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
48 /// SDOperand's instead of register numbers for the leaves of the matched
50 struct X86ISelAddressMode {
56 struct { // This is really a union, discriminated by BaseType!
61 bool isRIPRel; // RIP relative?
69 unsigned Align; // CP alignment.
72 : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
73 GV(0), CP(0), ES(0), JT(-1), Align(0) {
80 NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
83 NumLoadMoved("x86-codegen", "Number of loads moved below TokenFactor");
85 //===--------------------------------------------------------------------===//
86 /// ISel - X86 specific code to select X86 machine instructions for
87 /// SelectionDAG operations.
89 class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
90 /// ContainsFPCode - Every instruction we select that uses or defines a FP
91 /// register should set this to true.
94 /// FastISel - Enable fast(er) instruction selection.
98 /// TM - Keep a reference to X86TargetMachine.
100 X86TargetMachine &TM;
102 /// X86Lowering - This object fully describes how to lower LLVM code to an
103 /// X86-specific SelectionDAG.
104 X86TargetLowering X86Lowering;
106 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
107 /// make the right decision when generating code for different targets.
108 const X86Subtarget *Subtarget;
110 /// GlobalBaseReg - keeps track of the virtual register mapped onto global
112 unsigned GlobalBaseReg;
115 X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
116 : SelectionDAGISel(X86Lowering),
117 ContainsFPCode(false), FastISel(fast), TM(tm),
118 X86Lowering(*TM.getTargetLowering()),
119 Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
121 virtual bool runOnFunction(Function &Fn) {
122 // Make sure we re-emit a set of the global base reg if necessary
124 return SelectionDAGISel::runOnFunction(Fn);
127 virtual const char *getPassName() const {
128 return "X86 DAG->DAG Instruction Selection";
131 /// InstructionSelectBasicBlock - This callback is invoked by
132 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
133 virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
135 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
137 virtual bool CanBeFoldedBy(SDNode *N, SDNode *U);
139 // Include the pieces autogenerated from the target description.
140 #include "X86GenDAGISel.inc"
143 SDNode *Select(SDOperand N);
145 bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot = true);
146 bool SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
147 SDOperand &Index, SDOperand &Disp);
148 bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
149 SDOperand &Index, SDOperand &Disp);
150 bool SelectScalarSSELoad(SDOperand N, SDOperand &Base, SDOperand &Scale,
151 SDOperand &Index, SDOperand &Disp,
152 SDOperand &InChain, SDOperand &OutChain);
153 bool TryFoldLoad(SDOperand P, SDOperand N,
154 SDOperand &Base, SDOperand &Scale,
155 SDOperand &Index, SDOperand &Disp);
156 void InstructionSelectPreprocess(SelectionDAG &DAG);
158 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
159 /// inline asm expressions.
160 virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
162 std::vector<SDOperand> &OutOps,
165 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
167 inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
168 SDOperand &Scale, SDOperand &Index,
170 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
171 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
173 Scale = getI8Imm(AM.Scale);
175 // These are 32-bit even in 64-bit mode since RIP relative offset
178 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
180 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
182 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
183 else if (AM.JT != -1)
184 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
186 Disp = getI32Imm(AM.Disp);
189 /// getI8Imm - Return a target constant with the specified value, of type
191 inline SDOperand getI8Imm(unsigned Imm) {
192 return CurDAG->getTargetConstant(Imm, MVT::i8);
195 /// getI16Imm - Return a target constant with the specified value, of type
197 inline SDOperand getI16Imm(unsigned Imm) {
198 return CurDAG->getTargetConstant(Imm, MVT::i16);
201 /// getI32Imm - Return a target constant with the specified value, of type
203 inline SDOperand getI32Imm(unsigned Imm) {
204 return CurDAG->getTargetConstant(Imm, MVT::i32);
207 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
208 /// base register. Return the virtual register that holds this value.
209 SDNode *getGlobalBaseReg();
217 static SDNode *findFlagUse(SDNode *N) {
218 unsigned FlagResNo = N->getNumValues()-1;
219 for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
221 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
222 SDOperand Op = User->getOperand(i);
223 if (Op.ResNo == FlagResNo)
230 static void findNonImmUse(SDNode* Use, SDNode* Def, SDNode *Ignore, bool &found,
231 std::set<SDNode *> &Visited) {
233 Use->getNodeId() > Def->getNodeId() ||
234 !Visited.insert(Use).second)
237 for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
238 SDNode *N = Use->getOperand(i).Val;
242 findNonImmUse(N, Def, Ignore, found, Visited);
250 static inline bool isNonImmUse(SDNode* Use, SDNode* Def, SDNode *Ignore=NULL) {
251 std::set<SDNode *> Visited;
253 for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
254 SDNode *N = Use->getOperand(i).Val;
255 if (N != Def && N != Ignore) {
256 findNonImmUse(N, Def, Ignore, found, Visited);
261 if (!found && Ignore) {
262 // We must be checking for reachability between Def and a flag use. Go down
263 // recursively if Use also produces a flag.
264 MVT::ValueType VT = Use->getValueType(Use->getNumValues()-1);
265 if (VT == MVT::Flag && !Use->use_empty()) {
266 SDNode *FU = findFlagUse(Use);
268 return !isNonImmUse(FU, Def, Use);
275 bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U) {
276 // If U use can somehow reach N through another path then U can't fold N or
277 // it will create a cycle. e.g. In the following diagram, U can reach N
278 // through X. If N is folded into into U, then X is both a predecessor and
288 if (!FastISel && !isNonImmUse(U, N)) {
289 // If U produces a flag, then it gets (even more) interesting. Since it
290 // would have been "glued" together with its flag use, we need to check if
302 // If FU (flag use) indirectly reach N (the load), and U fold N (call it
303 // NU), then TF is a predecessor of FU and a successor of NU. But since
304 // NU and FU are flagged together, this effectively creates a cycle.
305 MVT::ValueType VT = U->getValueType(U->getNumValues()-1);
306 if (VT == MVT::Flag && !U->use_empty()) {
307 SDNode *FU = findFlagUse(U);
309 return !isNonImmUse(FU, N, U);
316 /// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
317 /// and move load below the TokenFactor. Replace store's chain operand with
318 /// load's chain result.
319 static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load,
320 SDOperand Store, SDOperand TF) {
321 std::vector<SDOperand> Ops;
322 for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i)
323 if (Load.Val == TF.Val->getOperand(i).Val)
324 Ops.push_back(Load.Val->getOperand(0));
326 Ops.push_back(TF.Val->getOperand(i));
327 DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size());
328 DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
329 DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
330 Store.getOperand(2), Store.getOperand(3));
333 /// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction
334 /// selector to pick more load-modify-store instructions. This is a common
345 /// [TokenFactor] [Op]
352 /// The fact the store's chain operand != load's chain will prevent the
353 /// (store (op (load))) instruction from being selected. We can transform it to:
372 void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) {
373 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
374 E = DAG.allnodes_end(); I != E; ++I) {
375 if (I->getOpcode() != ISD::STORE)
377 SDOperand Chain = I->getOperand(0);
378 if (Chain.Val->getOpcode() != ISD::TokenFactor)
381 SDOperand N1 = I->getOperand(1);
382 SDOperand N2 = I->getOperand(2);
383 if (MVT::isFloatingPoint(N1.getValueType()) ||
384 MVT::isVector(N1.getValueType()) ||
390 unsigned Opcode = N1.Val->getOpcode();
399 SDOperand N10 = N1.getOperand(0);
400 SDOperand N11 = N1.getOperand(1);
401 if (ISD::isNON_EXTLoad(N10.Val))
403 else if (ISD::isNON_EXTLoad(N11.Val)) {
407 RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
408 (N10.getOperand(1) == N2) &&
409 (N10.Val->getValueType(0) == N1.getValueType());
424 SDOperand N10 = N1.getOperand(0);
425 if (ISD::isNON_EXTLoad(N10.Val))
426 RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
427 (N10.getOperand(1) == N2) &&
428 (N10.Val->getValueType(0) == N1.getValueType());
436 MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain);
442 /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
443 /// when it has created a SelectionDAG for us to codegen.
444 void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
446 MachineFunction::iterator FirstMBB = BB;
449 InstructionSelectPreprocess(DAG);
451 // Codegen the basic block.
453 DEBUG(std::cerr << "===== Instruction selection begins:\n");
456 DAG.setRoot(SelectRoot(DAG.getRoot()));
458 DEBUG(std::cerr << "===== Instruction selection ends:\n");
461 DAG.RemoveDeadNodes();
463 // Emit machine code to BB.
464 ScheduleAndEmitDAG(DAG);
466 // If we are emitting FP stack code, scan the basic block to determine if this
467 // block defines any FP values. If so, put an FP_REG_KILL instruction before
468 // the terminator of the block.
469 if (!Subtarget->hasSSE2()) {
470 // Note that FP stack instructions *are* used in SSE code when returning
471 // values, but these are not live out of the basic block, so we don't need
472 // an FP_REG_KILL in this case either.
473 bool ContainsFPCode = false;
475 // Scan all of the machine instructions in these MBBs, checking for FP
477 MachineFunction::iterator MBBI = FirstMBB;
479 for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
480 !ContainsFPCode && I != E; ++I) {
481 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
482 if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
483 MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
484 RegMap->getRegClass(I->getOperand(0).getReg()) ==
485 X86::RFPRegisterClass) {
486 ContainsFPCode = true;
491 } while (!ContainsFPCode && &*(MBBI++) != BB);
493 // Check PHI nodes in successor blocks. These PHI's will be lowered to have
494 // a copy of the input value in this block.
495 if (!ContainsFPCode) {
496 // Final check, check LLVM BB's that are successors to the LLVM BB
497 // corresponding to BB for FP PHI nodes.
498 const BasicBlock *LLVMBB = BB->getBasicBlock();
500 for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
501 !ContainsFPCode && SI != E; ++SI) {
502 for (BasicBlock::const_iterator II = SI->begin();
503 (PN = dyn_cast<PHINode>(II)); ++II) {
504 if (PN->getType()->isFloatingPoint()) {
505 ContainsFPCode = true;
512 // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
513 if (ContainsFPCode) {
514 BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
520 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
521 /// the main function.
522 void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
523 MachineFrameInfo *MFI) {
524 if (Subtarget->isTargetCygwin())
525 BuildMI(BB, X86::CALLpcrel32, 1).addExternalSymbol("__main");
527 // Switch the FPU to 64-bit precision mode for better compatibility and speed.
528 int CWFrameIdx = MFI->CreateStackObject(2, 2);
529 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
531 // Set the high part to be 64-bit precision.
532 addFrameReference(BuildMI(BB, X86::MOV8mi, 5),
533 CWFrameIdx, 1).addImm(2);
535 // Reload the modified control word now.
536 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
539 void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
540 // If this is main, emit special code for main.
541 MachineBasicBlock *BB = MF.begin();
542 if (Fn.hasExternalLinkage() && Fn.getName() == "main")
543 EmitSpecialCodeForMain(BB, MF.getFrameInfo());
546 /// MatchAddress - Add the specified node to the specified addressing mode,
547 /// returning true if it cannot be done. This just pattern matches for the
549 bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
551 // RIP relative addressing: %rip + 32-bit displacement!
553 if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
554 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
555 if (isInt32(AM.Disp + Val)) {
563 int id = N.Val->getNodeId();
564 bool Available = isSelected(id);
566 switch (N.getOpcode()) {
568 case ISD::Constant: {
569 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
570 if (isInt32(AM.Disp + Val)) {
577 case X86ISD::Wrapper:
578 // If value is available in a register both base and index components have
579 // been picked, we can't fit the result available in the register in the
580 // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
582 // Can't fit GV or CP in addressing mode for X86-64 medium or large code
583 // model since the displacement field is 32-bit. Ok for small code model.
585 // For X86-64 PIC code, only allow GV / CP + displacement so we can use RIP
586 // relative addressing mode.
587 if ((!Subtarget->is64Bit() || TM.getCodeModel() == CodeModel::Small) &&
588 (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val))) {
589 bool isRIP = Subtarget->is64Bit();
590 if (isRIP && (AM.Base.Reg.Val || AM.Scale > 1 || AM.IndexReg.Val ||
591 AM.BaseType == X86ISelAddressMode::FrameIndexBase))
593 if (ConstantPoolSDNode *CP =
594 dyn_cast<ConstantPoolSDNode>(N.getOperand(0))) {
596 AM.CP = CP->getConstVal();
597 AM.Align = CP->getAlignment();
598 AM.Disp += CP->getOffset();
603 } else if (GlobalAddressSDNode *G =
604 dyn_cast<GlobalAddressSDNode>(N.getOperand(0))) {
606 AM.GV = G->getGlobal();
607 AM.Disp += G->getOffset();
612 } else if (isRoot && isRIP) {
613 if (ExternalSymbolSDNode *S =
614 dyn_cast<ExternalSymbolSDNode>(N.getOperand(0))) {
615 AM.ES = S->getSymbol();
618 } else if (JumpTableSDNode *J =
619 dyn_cast<JumpTableSDNode>(N.getOperand(0))) {
620 AM.JT = J->getIndex();
628 case ISD::FrameIndex:
629 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
630 AM.BaseType = X86ISelAddressMode::FrameIndexBase;
631 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
637 if (!Available && AM.IndexReg.Val == 0 && AM.Scale == 1)
638 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
639 unsigned Val = CN->getValue();
640 if (Val == 1 || Val == 2 || Val == 3) {
642 SDOperand ShVal = N.Val->getOperand(0);
644 // Okay, we know that we have a scale by now. However, if the scaled
645 // value is an add of something and a constant, we can fold the
646 // constant into the disp field here.
647 if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
648 isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
649 AM.IndexReg = ShVal.Val->getOperand(0);
650 ConstantSDNode *AddVal =
651 cast<ConstantSDNode>(ShVal.Val->getOperand(1));
652 uint64_t Disp = AM.Disp + AddVal->getValue() << Val;
666 // X*[3,5,9] -> X+X*[2,4,8]
668 AM.BaseType == X86ISelAddressMode::RegBase &&
669 AM.Base.Reg.Val == 0 &&
670 AM.IndexReg.Val == 0)
671 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
672 if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
673 AM.Scale = unsigned(CN->getValue())-1;
675 SDOperand MulVal = N.Val->getOperand(0);
678 // Okay, we know that we have a scale by now. However, if the scaled
679 // value is an add of something and a constant, we can fold the
680 // constant into the disp field here.
681 if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
682 isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
683 Reg = MulVal.Val->getOperand(0);
684 ConstantSDNode *AddVal =
685 cast<ConstantSDNode>(MulVal.Val->getOperand(1));
686 uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
690 Reg = N.Val->getOperand(0);
692 Reg = N.Val->getOperand(0);
695 AM.IndexReg = AM.Base.Reg = Reg;
702 X86ISelAddressMode Backup = AM;
703 if (!MatchAddress(N.Val->getOperand(0), AM, false) &&
704 !MatchAddress(N.Val->getOperand(1), AM, false))
707 if (!MatchAddress(N.Val->getOperand(1), AM, false) &&
708 !MatchAddress(N.Val->getOperand(0), AM, false))
717 X86ISelAddressMode Backup = AM;
718 // Look for (x << c1) | c2 where (c2 < c1)
719 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(0));
720 if (CN && !MatchAddress(N.Val->getOperand(1), AM, false)) {
721 if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) {
722 AM.Disp = CN->getValue();
727 CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1));
728 if (CN && !MatchAddress(N.Val->getOperand(0), AM, false)) {
729 if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) {
730 AM.Disp = CN->getValue();
740 // Is the base register already occupied?
741 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
742 // If so, check to see if the scale index register is set.
743 if (AM.IndexReg.Val == 0) {
749 // Otherwise, we cannot select it.
753 // Default, generate it as a register.
754 AM.BaseType = X86ISelAddressMode::RegBase;
759 /// SelectAddr - returns true if it is able pattern match an addressing mode.
760 /// It returns the operands which make up the maximal addressing mode it can
761 /// match by reference.
762 bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
763 SDOperand &Index, SDOperand &Disp) {
764 X86ISelAddressMode AM;
765 if (MatchAddress(N, AM))
768 MVT::ValueType VT = N.getValueType();
769 if (AM.BaseType == X86ISelAddressMode::RegBase) {
770 if (!AM.Base.Reg.Val)
771 AM.Base.Reg = CurDAG->getRegister(0, VT);
774 if (!AM.IndexReg.Val)
775 AM.IndexReg = CurDAG->getRegister(0, VT);
777 getAddressOperands(AM, Base, Scale, Index, Disp);
781 /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
782 /// match a load whose top elements are either undef or zeros. The load flavor
783 /// is derived from the type of N, which is either v4f32 or v2f64.
784 bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand N, SDOperand &Base,
785 SDOperand &Scale, SDOperand &Index,
786 SDOperand &Disp, SDOperand &InChain,
787 SDOperand &OutChain) {
788 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
789 InChain = N.getOperand(0);
790 if (ISD::isNON_EXTLoad(InChain.Val)) {
791 LoadSDNode *LD = cast<LoadSDNode>(InChain);
792 SDOperand LoadAddr = LD->getBasePtr();
793 if (!SelectAddr(LoadAddr, Base, Scale, Index, Disp))
795 OutChain = LD->getChain();
799 // TODO: Also handle the case where we explicitly require zeros in the top
800 // elements. This is a vector shuffle from the zero vector.
806 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
807 /// mode it matches can be cost effectively emitted as an LEA instruction.
808 bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
810 SDOperand &Index, SDOperand &Disp) {
811 X86ISelAddressMode AM;
812 if (MatchAddress(N, AM))
815 MVT::ValueType VT = N.getValueType();
816 unsigned Complexity = 0;
817 if (AM.BaseType == X86ISelAddressMode::RegBase)
821 AM.Base.Reg = CurDAG->getRegister(0, VT);
822 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
828 AM.IndexReg = CurDAG->getRegister(0, VT);
832 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg
833 else if (AM.Scale > 1)
836 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
837 // to a LEA. This is determined with some expermentation but is by no means
838 // optimal (especially for code size consideration). LEA is nice because of
839 // its three-address nature. Tweak the cost function again when we can run
840 // convertToThreeAddress() at register allocation time.
841 if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
842 // For X86-64, we should always use lea to materialize RIP relative
844 if (Subtarget->is64Bit())
850 if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
853 if (Complexity > 2) {
854 getAddressOperands(AM, Base, Scale, Index, Disp);
860 bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N,
861 SDOperand &Base, SDOperand &Scale,
862 SDOperand &Index, SDOperand &Disp) {
863 if (ISD::isNON_EXTLoad(N.Val) &&
865 CanBeFoldedBy(N.Val, P.Val))
866 return SelectAddr(N.getOperand(1), Base, Scale, Index, Disp);
870 static bool isRegister0(SDOperand Op) {
871 if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op))
872 return (R->getReg() == 0);
876 /// getGlobalBaseReg - Output the instructions required to put the
877 /// base address to use for accessing globals into a register.
879 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
880 assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
881 if (!GlobalBaseReg) {
882 // Insert the set of GlobalBaseReg into the first MBB of the function
883 MachineBasicBlock &FirstMBB = BB->getParent()->front();
884 MachineBasicBlock::iterator MBBI = FirstMBB.begin();
885 SSARegMap *RegMap = BB->getParent()->getSSARegMap();
886 // FIXME: when we get to LP64, we will need to create the appropriate
887 // type of register here.
888 GlobalBaseReg = RegMap->createVirtualRegister(X86::GR32RegisterClass);
889 BuildMI(FirstMBB, MBBI, X86::MovePCtoStack, 0);
890 BuildMI(FirstMBB, MBBI, X86::POP32r, 1, GlobalBaseReg);
892 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
895 static SDNode *FindCallStartFromCall(SDNode *Node) {
896 if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
897 assert(Node->getOperand(0).getValueType() == MVT::Other &&
898 "Node doesn't have a token chain argument!");
899 return FindCallStartFromCall(Node->getOperand(0).Val);
902 SDNode *X86DAGToDAGISel::Select(SDOperand N) {
903 SDNode *Node = N.Val;
904 MVT::ValueType NVT = Node->getValueType(0);
906 unsigned Opcode = Node->getOpcode();
909 DEBUG(std::cerr << std::string(Indent, ' '));
910 DEBUG(std::cerr << "Selecting: ");
911 DEBUG(Node->dump(CurDAG));
912 DEBUG(std::cerr << "\n");
916 if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) {
918 DEBUG(std::cerr << std::string(Indent-2, ' '));
919 DEBUG(std::cerr << "== ");
920 DEBUG(Node->dump(CurDAG));
921 DEBUG(std::cerr << "\n");
924 return NULL; // Already selected.
929 case X86ISD::GlobalBaseReg:
930 return getGlobalBaseReg();
933 // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
934 // code and is matched first so to prevent it from being turned into
936 // In 64-bit mode, use LEA to take advantage of RIP-relative addressing.
937 MVT::ValueType PtrVT = TLI.getPointerTy();
938 SDOperand N0 = N.getOperand(0);
939 SDOperand N1 = N.getOperand(1);
940 if (N.Val->getValueType(0) == PtrVT &&
941 N0.getOpcode() == X86ISD::Wrapper &&
942 N1.getOpcode() == ISD::Constant) {
943 unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
945 // TODO: handle ExternalSymbolSDNode.
946 if (GlobalAddressSDNode *G =
947 dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
948 C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
949 G->getOffset() + Offset);
950 } else if (ConstantPoolSDNode *CP =
951 dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
952 C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT,
954 CP->getOffset()+Offset);
958 if (Subtarget->is64Bit()) {
959 SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
960 CurDAG->getRegister(0, PtrVT), C };
961 return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
963 return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
967 // Other cases are handled by auto-generated code.
973 if (Opcode == ISD::MULHU)
975 default: assert(0 && "Unsupported VT!");
976 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
977 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
978 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
979 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
983 default: assert(0 && "Unsupported VT!");
984 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
985 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
986 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
987 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
990 unsigned LoReg, HiReg;
992 default: assert(0 && "Unsupported VT!");
993 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
994 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
995 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
996 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
999 SDOperand N0 = Node->getOperand(0);
1000 SDOperand N1 = Node->getOperand(1);
1002 bool foldedLoad = false;
1003 SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
1004 foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
1005 // MULHU and MULHS are commmutative
1007 foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
1009 N0 = Node->getOperand(1);
1010 N1 = Node->getOperand(0);
1016 Chain = N1.getOperand(0);
1017 AddToISelQueue(Chain);
1019 Chain = CurDAG->getEntryNode();
1021 SDOperand InFlag(0, 0);
1023 Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
1025 InFlag = Chain.getValue(1);
1028 AddToISelQueue(Tmp0);
1029 AddToISelQueue(Tmp1);
1030 AddToISelQueue(Tmp2);
1031 AddToISelQueue(Tmp3);
1032 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
1034 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
1035 Chain = SDOperand(CNode, 0);
1036 InFlag = SDOperand(CNode, 1);
1040 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
1043 SDOperand Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
1044 ReplaceUses(N.getValue(0), Result);
1046 ReplaceUses(N1.getValue(1), Result.getValue(1));
1049 DEBUG(std::cerr << std::string(Indent-2, ' '));
1050 DEBUG(std::cerr << "=> ");
1051 DEBUG(Result.Val->dump(CurDAG));
1052 DEBUG(std::cerr << "\n");
1062 bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
1063 bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
1066 default: assert(0 && "Unsupported VT!");
1067 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
1068 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
1069 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
1070 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
1074 default: assert(0 && "Unsupported VT!");
1075 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
1076 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
1077 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
1078 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
1081 unsigned LoReg, HiReg;
1082 unsigned ClrOpcode, SExtOpcode;
1084 default: assert(0 && "Unsupported VT!");
1086 LoReg = X86::AL; HiReg = X86::AH;
1087 ClrOpcode = X86::MOV8r0;
1088 SExtOpcode = X86::CBW;
1091 LoReg = X86::AX; HiReg = X86::DX;
1092 ClrOpcode = X86::MOV16r0;
1093 SExtOpcode = X86::CWD;
1096 LoReg = X86::EAX; HiReg = X86::EDX;
1097 ClrOpcode = X86::MOV32r0;
1098 SExtOpcode = X86::CDQ;
1101 LoReg = X86::RAX; HiReg = X86::RDX;
1102 ClrOpcode = X86::MOV64r0;
1103 SExtOpcode = X86::CQO;
1107 SDOperand N0 = Node->getOperand(0);
1108 SDOperand N1 = Node->getOperand(1);
1110 bool foldedLoad = false;
1111 SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
1112 foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
1115 Chain = N1.getOperand(0);
1116 AddToISelQueue(Chain);
1118 Chain = CurDAG->getEntryNode();
1120 SDOperand InFlag(0, 0);
1122 Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
1124 InFlag = Chain.getValue(1);
1127 // Sign extend the low part into the high part.
1129 SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0);
1131 // Zero out the high part, effectively zero extending the input.
1132 SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0);
1133 Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(HiReg, NVT),
1135 InFlag = Chain.getValue(1);
1139 AddToISelQueue(Tmp0);
1140 AddToISelQueue(Tmp1);
1141 AddToISelQueue(Tmp2);
1142 AddToISelQueue(Tmp3);
1143 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
1145 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
1146 Chain = SDOperand(CNode, 0);
1147 InFlag = SDOperand(CNode, 1);
1151 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
1154 SDOperand Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg,
1156 ReplaceUses(N.getValue(0), Result);
1158 ReplaceUses(N1.getValue(1), Result.getValue(1));
1161 DEBUG(std::cerr << std::string(Indent-2, ' '));
1162 DEBUG(std::cerr << "=> ");
1163 DEBUG(Result.Val->dump(CurDAG));
1164 DEBUG(std::cerr << "\n");
1171 case ISD::TRUNCATE: {
1172 if (!Subtarget->is64Bit() && NVT == MVT::i8) {
1175 switch (Node->getOperand(0).getValueType()) {
1176 default: assert(0 && "Unknown truncate!");
1178 Opc = X86::MOV16to16_;
1180 Opc2 = X86::TRUNC_16_to8;
1183 Opc = X86::MOV32to32_;
1185 Opc2 = X86::TRUNC_32_to8;
1189 AddToISelQueue(Node->getOperand(0));
1191 SDOperand(CurDAG->getTargetNode(Opc, VT, Node->getOperand(0)), 0);
1192 SDNode *ResNode = CurDAG->getTargetNode(Opc2, NVT, Tmp);
1195 DEBUG(std::cerr << std::string(Indent-2, ' '));
1196 DEBUG(std::cerr << "=> ");
1197 DEBUG(ResNode->dump(CurDAG));
1198 DEBUG(std::cerr << "\n");
1208 SDNode *ResNode = SelectCode(N);
1211 DEBUG(std::cerr << std::string(Indent-2, ' '));
1212 DEBUG(std::cerr << "=> ");
1213 if (ResNode == NULL || ResNode == N.Val)
1214 DEBUG(N.Val->dump(CurDAG));
1216 DEBUG(ResNode->dump(CurDAG));
1217 DEBUG(std::cerr << "\n");
1224 bool X86DAGToDAGISel::
1225 SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode,
1226 std::vector<SDOperand> &OutOps, SelectionDAG &DAG){
1227 SDOperand Op0, Op1, Op2, Op3;
1228 switch (ConstraintCode) {
1229 case 'o': // offsetable ??
1230 case 'v': // not offsetable ??
1231 default: return true;
1233 if (!SelectAddr(Op, Op0, Op1, Op2, Op3))
1238 OutOps.push_back(Op0);
1239 OutOps.push_back(Op1);
1240 OutOps.push_back(Op2);
1241 OutOps.push_back(Op3);
1242 AddToISelQueue(Op0);
1243 AddToISelQueue(Op1);
1244 AddToISelQueue(Op2);
1245 AddToISelQueue(Op3);
1249 /// createX86ISelDag - This pass converts a legalized DAG into a
1250 /// X86-specific DAG, ready for instruction scheduling.
1252 FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) {
1253 return new X86DAGToDAGISel(TM, Fast);