//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "isel"
+#define DEBUG_TYPE "x86-isel"
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/ADT/Statistic.h"
+#include <deque>
#include <iostream>
+#include <queue>
#include <set>
using namespace llvm;
struct X86ISelAddressMode {
enum {
RegBase,
- FrameIndexBase,
+ FrameIndexBase
} BaseType;
struct { // This is really a union, discriminated by BaseType!
int FrameIndex;
} Base;
+ bool isRIPRel; // RIP relative?
unsigned Scale;
SDOperand IndexReg;
unsigned Disp;
GlobalValue *GV;
Constant *CP;
+ const char *ES;
+ int JT;
unsigned Align; // CP alignment.
X86ISelAddressMode()
- : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), GV(0),
- CP(0), Align(0) {
+ : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
+ GV(0), CP(0), ES(0), JT(-1), Align(0) {
}
};
}
Statistic<>
NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
+ Statistic<>
+ NumLoadMoved("x86-codegen", "Number of loads moved below TokenFactor");
+
//===--------------------------------------------------------------------===//
/// ISel - X86 specific code to select X86 machine instructions for
/// SelectionDAG operations.
///
- class X86DAGToDAGISel : public SelectionDAGISel {
+ class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
/// ContainsFPCode - Every instruction we select that uses or defines a FP
/// register should set this to true.
bool ContainsFPCode;
+ /// FastISel - Enable fast(er) instruction selection.
+ ///
+ bool FastISel;
+
+ /// TM - Keep a reference to X86TargetMachine.
+ ///
+ X86TargetMachine &TM;
+
/// X86Lowering - This object fully describes how to lower LLVM code to an
/// X86-specific SelectionDAG.
X86TargetLowering X86Lowering;
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
+ /// GlobalBaseReg - keeps track of the virtual register mapped onto global
+ /// base register.
unsigned GlobalBaseReg;
+
public:
- X86DAGToDAGISel(X86TargetMachine &TM)
+ X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
: SelectionDAGISel(X86Lowering),
- X86Lowering(*TM.getTargetLowering()) {
- Subtarget = &TM.getSubtarget<X86Subtarget>();
- }
+ ContainsFPCode(false), FastISel(fast), TM(tm),
+ X86Lowering(*TM.getTargetLowering()),
+ Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
virtual bool runOnFunction(Function &Fn) {
// Make sure we re-emit a set of the global base reg if necessary
virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
+ virtual bool CanBeFoldedBy(SDNode *N, SDNode *U);
+
// Include the pieces autogenerated from the target description.
#include "X86GenDAGISel.inc"
private:
- void Select(SDOperand &Result, SDOperand N);
+ SDNode *Select(SDOperand N);
bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot = true);
bool SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
bool TryFoldLoad(SDOperand P, SDOperand N,
SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp);
+ void InstructionSelectPreprocess(SelectionDAG &DAG);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
+ char ConstraintCode,
+ std::vector<SDOperand> &OutOps,
+ SelectionDAG &DAG);
+
+ void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
SDOperand &Scale, SDOperand &Index,
SDOperand &Disp) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
- CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, MVT::i32) : AM.Base.Reg;
+ CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
+ AM.Base.Reg;
Scale = getI8Imm(AM.Scale);
Index = AM.IndexReg;
- Disp = AM.GV ? CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp)
- : (AM.CP ?
- CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp)
- : getI32Imm(AM.Disp));
+ // These are 32-bit even in 64-bit mode since RIP relative offset
+ // is 32-bit.
+ if (AM.GV)
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
+ else if (AM.CP)
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
+ else if (AM.ES)
+ Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
+ else if (AM.JT != -1)
+ Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
+ else
+ Disp = getI32Imm(AM.Disp);
}
/// getI8Imm - Return a target constant with the specified value, of type
/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
/// base register. Return the virtual register that holds this value.
- SDOperand getGlobalBaseReg();
+ SDNode *getGlobalBaseReg();
#ifndef NDEBUG
unsigned Indent;
};
}
+static void findNonImmUse(SDNode* Use, SDNode* Def, bool &found,
+ std::set<SDNode *> &Visited) {
+ if (found ||
+ Use->getNodeId() > Def->getNodeId() ||
+ !Visited.insert(Use).second)
+ return;
+
+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ SDNode *N = Use->getOperand(i).Val;
+ if (N != Def) {
+ findNonImmUse(N, Def, found, Visited);
+ } else {
+ found = true;
+ break;
+ }
+ }
+}
+
+static inline bool isNonImmUse(SDNode* Use, SDNode* Def) {
+ std::set<SDNode *> Visited;
+ bool found = false;
+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ SDNode *N = Use->getOperand(i).Val;
+ if (N != Def) {
+ findNonImmUse(N, Def, found, Visited);
+ if (found) break;
+ }
+ }
+ return found;
+}
+
+
+bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U) {
+ // If U use can somehow reach N through another path then U can't fold N or
+ // it will create a cycle. e.g. In the following diagram, U can reach N
+ // through X. If N is folded into into U, then X is both a predecessor and
+ // a successor of U.
+ //
+ // [ N ]
+ // ^ ^
+ // | |
+ // / \---
+ // / [X]
+ // | ^
+ // [U]--------|
+ return !FastISel && !isNonImmUse(U, N);
+}
+
+/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
+/// and move load below the TokenFactor. Replace store's chain operand with
+/// load's chain result.
+static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load,
+ SDOperand Store, SDOperand TF) {
+ std::vector<SDOperand> Ops;
+ for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i)
+ if (Load.Val == TF.Val->getOperand(i).Val)
+ Ops.push_back(Load.Val->getOperand(0));
+ else
+ Ops.push_back(TF.Val->getOperand(i));
+ DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size());
+ DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
+ DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
+ Store.getOperand(2), Store.getOperand(3));
+}
+
+/// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction
+/// selector to pick more load-modify-store instructions. This is a common
+/// case:
+///
+/// [Load chain]
+/// ^
+/// |
+/// [Load]
+/// ^ ^
+/// | |
+/// / \-
+/// / |
+/// [TokenFactor] [Op]
+/// ^ ^
+/// | |
+/// \ /
+/// \ /
+/// [Store]
+///
+/// The fact the store's chain operand != load's chain will prevent the
+/// (store (op (load))) instruction from being selected. We can transform it to:
+///
+/// [Load chain]
+/// ^
+/// |
+/// [TokenFactor]
+/// ^
+/// |
+/// [Load]
+/// ^ ^
+/// | |
+/// | \-
+/// | |
+/// | [Op]
+/// | ^
+/// | |
+/// \ /
+/// \ /
+/// [Store]
+void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) {
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ++I) {
+ if (I->getOpcode() != ISD::STORE)
+ continue;
+ SDOperand Chain = I->getOperand(0);
+ if (Chain.Val->getOpcode() != ISD::TokenFactor)
+ continue;
+
+ SDOperand N1 = I->getOperand(1);
+ SDOperand N2 = I->getOperand(2);
+ if (MVT::isFloatingPoint(N1.getValueType()) ||
+ MVT::isVector(N1.getValueType()) ||
+ !N1.hasOneUse())
+ continue;
+
+ bool RModW = false;
+ SDOperand Load;
+ unsigned Opcode = N1.Val->getOpcode();
+ switch (Opcode) {
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADDC:
+ case ISD::ADDE: {
+ SDOperand N10 = N1.getOperand(0);
+ SDOperand N11 = N1.getOperand(1);
+ if (N10.Val->getOpcode() == ISD::LOAD)
+ RModW = true;
+ else if (N11.Val->getOpcode() == ISD::LOAD) {
+ RModW = true;
+ std::swap(N10, N11);
+ }
+ RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
+ (N10.getOperand(1) == N2) &&
+ (N10.Val->getValueType(0) == N1.getValueType());
+ if (RModW)
+ Load = N10;
+ break;
+ }
+ case ISD::SUB:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ case X86ISD::SHLD:
+ case X86ISD::SHRD: {
+ SDOperand N10 = N1.getOperand(0);
+ if (N10.Val->getOpcode() == ISD::LOAD)
+ RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
+ (N10.getOperand(1) == N2) &&
+ (N10.Val->getValueType(0) == N1.getValueType());
+ if (RModW)
+ Load = N10;
+ break;
+ }
+ }
+
+ if (RModW) {
+ MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain);
+ ++NumLoadMoved;
+ }
+ }
+}
+
/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
/// when it has created a SelectionDAG for us to codegen.
void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
DEBUG(BB->dump());
MachineFunction::iterator FirstMBB = BB;
+ if (!FastISel)
+ InstructionSelectPreprocess(DAG);
+
// Codegen the basic block.
#ifndef NDEBUG
DEBUG(std::cerr << "===== Instruction selection begins:\n");
#ifndef NDEBUG
DEBUG(std::cerr << "===== Instruction selection ends:\n");
#endif
- CodeGenMap.clear();
+
DAG.RemoveDeadNodes();
// Emit machine code to BB.
/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
/// the main function.
-static void EmitSpecialCodeForMain(MachineBasicBlock *BB,
- MachineFrameInfo *MFI) {
+void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
+ MachineFrameInfo *MFI) {
+ if (Subtarget->TargetType == X86Subtarget::isCygwin)
+ BuildMI(BB, X86::CALLpcrel32, 1).addExternalSymbol("__main");
+
// Switch the FPU to 64-bit precision mode for better compatibility and speed.
int CWFrameIdx = MFI->CreateStackObject(2, 2);
addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
/// addressing mode
bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
bool isRoot) {
- bool Available = false;
- // If N has already been selected, reuse the result unless in some very
- // specific cases.
- std::map<SDOperand, SDOperand>::iterator CGMI= CodeGenMap.find(N.getValue(0));
- if (CGMI != CodeGenMap.end()) {
- Available = true;
+ // RIP relative addressing: %rip + 32-bit displacement!
+ if (AM.isRIPRel) {
+ if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
+ uint64_t Val = cast<ConstantSDNode>(N)->getValue();
+ if (isInt32(AM.Disp + Val)) {
+ AM.Disp += Val;
+ return false;
+ }
+ }
+ return true;
}
+ int id = N.Val->getNodeId();
+ bool Available = isSelected(id);
+
switch (N.getOpcode()) {
default: break;
- case ISD::Constant:
- AM.Disp += cast<ConstantSDNode>(N)->getValue();
- return false;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getValue();
+ if (isInt32(AM.Disp + Val)) {
+ AM.Disp += Val;
+ return false;
+ }
+ break;
+ }
case X86ISD::Wrapper:
- // If both base and index components have been picked, we can't fit
- // the result available in the register in the addressing mode. Duplicate
- // GlobalAddress or ConstantPool as displacement.
- if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
+ // If value is available in a register both base and index components have
+ // been picked, we can't fit the result available in the register in the
+ // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
+
+ // Can't fit GV or CP in addressing mode for X86-64 medium or large code
+ // model since the displacement field is 32-bit. Ok for small code model.
+
+ // For X86-64 PIC code, only allow GV / CP + displacement so we can use RIP
+ // relative addressing mode.
+ if ((!Subtarget->is64Bit() || TM.getCodeModel() == CodeModel::Small) &&
+ (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val))) {
+ bool isRIP = Subtarget->is64Bit();
+ if (isRIP && (AM.Base.Reg.Val || AM.Scale > 1 || AM.IndexReg.Val ||
+ AM.BaseType == X86ISelAddressMode::FrameIndexBase))
+ break;
if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(N.getOperand(0))) {
if (AM.CP == 0) {
AM.CP = CP->get();
AM.Align = CP->getAlignment();
AM.Disp += CP->getOffset();
+ if (isRIP)
+ AM.isRIPRel = true;
return false;
}
} else if (GlobalAddressSDNode *G =
if (AM.GV == 0) {
AM.GV = G->getGlobal();
AM.Disp += G->getOffset();
+ if (isRIP)
+ AM.isRIPRel = true;
+ return false;
+ }
+ } else if (isRoot && isRIP) {
+ if (ExternalSymbolSDNode *S =
+ dyn_cast<ExternalSymbolSDNode>(N.getOperand(0))) {
+ AM.ES = S->getSymbol();
+ AM.isRIPRel = true;
+ return false;
+ } else if (JumpTableSDNode *J =
+ dyn_cast<JumpTableSDNode>(N.getOperand(0))) {
+ AM.JT = J->getIndex();
+ AM.isRIPRel = true;
return false;
}
}
AM.IndexReg = ShVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.Val->getOperand(1));
- AM.Disp += AddVal->getValue() << Val;
+ uint64_t Disp = AM.Disp + AddVal->getValue() << Val;
+ if (isInt32(Disp))
+ AM.Disp = Disp;
+ else
+ AM.IndexReg = ShVal;
} else {
AM.IndexReg = ShVal;
}
Reg = MulVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(MulVal.Val->getOperand(1));
- AM.Disp += AddVal->getValue() * CN->getValue();
+ uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
+ if (isInt32(Disp))
+ AM.Disp = Disp;
+ else
+ Reg = N.Val->getOperand(0);
} else {
Reg = N.Val->getOperand(0);
}
}
break;
}
+
+ case ISD::OR: {
+ if (!Available) {
+ X86ISelAddressMode Backup = AM;
+ // Look for (x << c1) | c2 where (c2 < c1)
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(0));
+ if (CN && !MatchAddress(N.Val->getOperand(1), AM, false)) {
+ if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) {
+ AM.Disp = CN->getValue();
+ return false;
+ }
+ }
+ AM = Backup;
+ CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1));
+ if (CN && !MatchAddress(N.Val->getOperand(0), AM, false)) {
+ if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) {
+ AM.Disp = CN->getValue();
+ return false;
+ }
+ }
+ AM = Backup;
+ }
+ break;
+ }
}
// Is the base register already occupied?
if (MatchAddress(N, AM))
return false;
+ MVT::ValueType VT = N.getValueType();
if (AM.BaseType == X86ISelAddressMode::RegBase) {
if (!AM.Base.Reg.Val)
- AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
}
if (!AM.IndexReg.Val)
- AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
+ AM.IndexReg = CurDAG->getRegister(0, VT);
getAddressOperands(AM, Base, Scale, Index, Disp);
-
return true;
}
/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
-/// For X86, it always is unless it's just a (Reg + const).
bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
SDOperand &Scale,
SDOperand &Index, SDOperand &Disp) {
if (MatchAddress(N, AM))
return false;
+ MVT::ValueType VT = N.getValueType();
unsigned Complexity = 0;
if (AM.BaseType == X86ISelAddressMode::RegBase)
if (AM.Base.Reg.Val)
Complexity = 1;
else
- AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
Complexity = 4;
if (AM.IndexReg.Val)
Complexity++;
else
- AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
+ AM.IndexReg = CurDAG->getRegister(0, VT);
if (AM.Scale > 2)
Complexity += 2;
// optimal (especially for code size consideration). LEA is nice because of
// its three-address nature. Tweak the cost function again when we can run
// convertToThreeAddress() at register allocation time.
- if (AM.GV || AM.CP)
- Complexity += 2;
+ if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
+ // For X86-64, we should always use lea to materialize RIP relative
+ // addresses.
+ if (Subtarget->is64Bit())
+ Complexity = 4;
+ else
+ Complexity += 2;
+ }
if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
Complexity++;
getAddressOperands(AM, Base, Scale, Index, Disp);
return true;
}
-
return false;
}
SDOperand &Index, SDOperand &Disp) {
if (N.getOpcode() == ISD::LOAD &&
N.hasOneUse() &&
- !CodeGenMap.count(N.getValue(0)) &&
- (P.getNumOperands() == 1 || !isNonImmUse(P.Val, N.Val)))
+ P.Val->isOnlyUse(N.Val) &&
+ CanBeFoldedBy(N.Val, P.Val))
return SelectAddr(N.getOperand(1), Base, Scale, Index, Disp);
return false;
}
/// getGlobalBaseReg - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
-SDOperand X86DAGToDAGISel::getGlobalBaseReg() {
+SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
+ assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
if (!GlobalBaseReg) {
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = BB->getParent()->front();
BuildMI(FirstMBB, MBBI, X86::MovePCtoStack, 0);
BuildMI(FirstMBB, MBBI, X86::POP32r, 1, GlobalBaseReg);
}
- return CurDAG->getRegister(GlobalBaseReg, MVT::i32);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
}
static SDNode *FindCallStartFromCall(SDNode *Node) {
return FindCallStartFromCall(Node->getOperand(0).Val);
}
-void X86DAGToDAGISel::Select(SDOperand &Result, SDOperand N) {
+SDNode *X86DAGToDAGISel::Select(SDOperand N) {
SDNode *Node = N.Val;
MVT::ValueType NVT = Node->getValueType(0);
unsigned Opc, MOpc;
#endif
if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) {
- Result = N;
#ifndef NDEBUG
DEBUG(std::cerr << std::string(Indent-2, ' '));
DEBUG(std::cerr << "== ");
DEBUG(std::cerr << "\n");
Indent -= 2;
#endif
- return; // Already selected.
+ return NULL; // Already selected.
}
- std::map<SDOperand, SDOperand>::iterator CGMI = CodeGenMap.find(N);
- if (CGMI != CodeGenMap.end()) {
- Result = CGMI->second;
-#ifndef NDEBUG
- DEBUG(std::cerr << std::string(Indent-2, ' '));
- DEBUG(std::cerr << "== ");
- DEBUG(Result.Val->dump(CurDAG));
- DEBUG(std::cerr << "\n");
- Indent -= 2;
-#endif
- return;
- }
-
switch (Opcode) {
default: break;
case X86ISD::GlobalBaseReg:
- Result = getGlobalBaseReg();
- return;
+ return getGlobalBaseReg();
case ISD::ADD: {
// Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
// code and is matched first so to prevent it from being turned into
// LEA32r X+c.
+ // In 64-bit mode, use LEA to take advantage of RIP-relative addressing.
+ MVT::ValueType PtrVT = TLI.getPointerTy();
SDOperand N0 = N.getOperand(0);
SDOperand N1 = N.getOperand(1);
- if (N.Val->getValueType(0) == MVT::i32 &&
+ if (N.Val->getValueType(0) == PtrVT &&
N0.getOpcode() == X86ISD::Wrapper &&
N1.getOpcode() == ISD::Constant) {
unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
// TODO: handle ExternalSymbolSDNode.
if (GlobalAddressSDNode *G =
dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
- C = CurDAG->getTargetGlobalAddress(G->getGlobal(), MVT::i32,
+ C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
G->getOffset() + Offset);
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
- C = CurDAG->getTargetConstantPool(CP->get(), MVT::i32,
+ C = CurDAG->getTargetConstantPool(CP->get(), PtrVT,
CP->getAlignment(),
CP->getOffset()+Offset);
}
if (C.Val) {
- if (N.Val->hasOneUse()) {
- Result = CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, MVT::i32, C);
- } else {
- SDNode *ResNode = CurDAG->getTargetNode(X86::MOV32ri, MVT::i32, C);
- Result = CodeGenMap[N] = SDOperand(ResNode, 0);
- }
- return;
+ if (Subtarget->is64Bit()) {
+ SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
+ CurDAG->getRegister(0, PtrVT), C };
+ return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
+ } else
+ return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
}
}
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+ case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
}
else
switch (NVT) {
case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
+ case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
}
unsigned LoReg, HiReg;
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
+ case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
}
SDOperand N0 = Node->getOperand(0);
}
SDOperand Chain;
- if (foldedLoad)
- Select(Chain, N1.getOperand(0));
- else
+ if (foldedLoad) {
+ Chain = N1.getOperand(0);
+ AddToISelQueue(Chain);
+ } else
Chain = CurDAG->getEntryNode();
SDOperand InFlag(0, 0);
- Select(N0, N0);
+ AddToISelQueue(N0);
Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
N0, InFlag);
InFlag = Chain.getValue(1);
if (foldedLoad) {
- Select(Tmp0, Tmp0);
- Select(Tmp1, Tmp1);
- Select(Tmp2, Tmp2);
- Select(Tmp3, Tmp3);
+ AddToISelQueue(Tmp0);
+ AddToISelQueue(Tmp1);
+ AddToISelQueue(Tmp2);
+ AddToISelQueue(Tmp3);
+ SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
SDNode *CNode =
- CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1,
- Tmp2, Tmp3, Chain, InFlag);
+ CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
Chain = SDOperand(CNode, 0);
InFlag = SDOperand(CNode, 1);
} else {
- Select(N1, N1);
+ AddToISelQueue(N1);
InFlag =
SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
}
- Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
- CodeGenMap[N.getValue(0)] = Result;
- if (foldedLoad) {
- CodeGenMap[N1.getValue(1)] = Result.getValue(1);
- AddHandleReplacement(N1.Val, 1, Result.Val, 1);
- }
+ SDOperand Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
+ ReplaceUses(N.getValue(0), Result);
+ if (foldedLoad)
+ ReplaceUses(N1.getValue(1), Result.getValue(1));
#ifndef NDEBUG
DEBUG(std::cerr << std::string(Indent-2, ' '));
- DEBUG(std::cerr << "== ");
+ DEBUG(std::cerr << "=> ");
DEBUG(Result.Val->dump(CurDAG));
DEBUG(std::cerr << "\n");
Indent -= 2;
#endif
- return;
+ return NULL;
}
case ISD::SDIV:
case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
+ case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
}
else
switch (NVT) {
case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+ case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
}
unsigned LoReg, HiReg;
default: assert(0 && "Unsupported VT!");
case MVT::i8:
LoReg = X86::AL; HiReg = X86::AH;
- ClrOpcode = X86::MOV8ri;
+ ClrOpcode = X86::MOV8r0;
SExtOpcode = X86::CBW;
break;
case MVT::i16:
LoReg = X86::AX; HiReg = X86::DX;
- ClrOpcode = X86::MOV16ri;
+ ClrOpcode = X86::MOV16r0;
SExtOpcode = X86::CWD;
break;
case MVT::i32:
LoReg = X86::EAX; HiReg = X86::EDX;
- ClrOpcode = X86::MOV32ri;
+ ClrOpcode = X86::MOV32r0;
SExtOpcode = X86::CDQ;
break;
+ case MVT::i64:
+ LoReg = X86::RAX; HiReg = X86::RDX;
+ ClrOpcode = X86::MOV64r0;
+ SExtOpcode = X86::CQO;
+ break;
}
SDOperand N0 = Node->getOperand(0);
SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
SDOperand Chain;
- if (foldedLoad)
- Select(Chain, N1.getOperand(0));
- else
+ if (foldedLoad) {
+ Chain = N1.getOperand(0);
+ AddToISelQueue(Chain);
+ } else
Chain = CurDAG->getEntryNode();
SDOperand InFlag(0, 0);
- Select(N0, N0);
+ AddToISelQueue(N0);
Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
N0, InFlag);
InFlag = Chain.getValue(1);
SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0);
} else {
// Zero out the high part, effectively zero extending the input.
- SDOperand ClrNode =
- SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT,
- CurDAG->getTargetConstant(0, NVT)), 0);
+ SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0);
Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(HiReg, NVT),
ClrNode, InFlag);
InFlag = Chain.getValue(1);
}
if (foldedLoad) {
- Select(Tmp0, Tmp0);
- Select(Tmp1, Tmp1);
- Select(Tmp2, Tmp2);
- Select(Tmp3, Tmp3);
+ AddToISelQueue(Tmp0);
+ AddToISelQueue(Tmp1);
+ AddToISelQueue(Tmp2);
+ AddToISelQueue(Tmp3);
+ SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
SDNode *CNode =
- CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Tmp0, Tmp1,
- Tmp2, Tmp3, Chain, InFlag);
+ CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
Chain = SDOperand(CNode, 0);
InFlag = SDOperand(CNode, 1);
} else {
- Select(N1, N1);
+ AddToISelQueue(N1);
InFlag =
SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
}
- Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg,
- NVT, InFlag);
- CodeGenMap[N.getValue(0)] = Result;
- if (foldedLoad) {
- CodeGenMap[N1.getValue(1)] = Result.getValue(1);
- AddHandleReplacement(N1.Val, 1, Result.Val, 1);
- }
+ SDOperand Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg,
+ NVT, InFlag);
+ ReplaceUses(N.getValue(0), Result);
+ if (foldedLoad)
+ ReplaceUses(N1.getValue(1), Result.getValue(1));
#ifndef NDEBUG
DEBUG(std::cerr << std::string(Indent-2, ' '));
- DEBUG(std::cerr << "== ");
+ DEBUG(std::cerr << "=> ");
DEBUG(Result.Val->dump(CurDAG));
DEBUG(std::cerr << "\n");
Indent -= 2;
#endif
- return;
+
+ return NULL;
}
case ISD::TRUNCATE: {
- if (NVT == MVT::i8) {
+ if (!Subtarget->is64Bit() && NVT == MVT::i8) {
unsigned Opc2;
MVT::ValueType VT;
switch (Node->getOperand(0).getValueType()) {
case MVT::i16:
Opc = X86::MOV16to16_;
VT = MVT::i16;
- Opc2 = X86::TRUNC_GR16_GR8;
+ Opc2 = X86::TRUNC_16_to8;
break;
case MVT::i32:
Opc = X86::MOV32to32_;
VT = MVT::i32;
- Opc2 = X86::TRUNC_GR32_GR8;
+ Opc2 = X86::TRUNC_32_to8;
break;
}
- SDOperand Tmp0, Tmp1;
- Select(Tmp0, Node->getOperand(0));
- Tmp1 = SDOperand(CurDAG->getTargetNode(Opc, VT, Tmp0), 0);
- Result = CodeGenMap[N] =
- SDOperand(CurDAG->getTargetNode(Opc2, NVT, Tmp1), 0);
+ AddToISelQueue(Node->getOperand(0));
+ SDOperand Tmp =
+ SDOperand(CurDAG->getTargetNode(Opc, VT, Node->getOperand(0)), 0);
+ SDNode *ResNode = CurDAG->getTargetNode(Opc2, NVT, Tmp);
#ifndef NDEBUG
DEBUG(std::cerr << std::string(Indent-2, ' '));
- DEBUG(std::cerr << "== ");
- DEBUG(Result.Val->dump(CurDAG));
+ DEBUG(std::cerr << "=> ");
+ DEBUG(ResNode->dump(CurDAG));
DEBUG(std::cerr << "\n");
Indent -= 2;
#endif
- return;
+ return ResNode;
}
break;
}
-
- case X86ISD::CALL:
- case X86ISD::TAILCALL: {
- // Handle indirect call which folds a load here. This never matches by
- // the TableGen generated code since the load's chain result is read by
- // the callseq_start node or by a TokenFactor which feeds into the
- // callseq_start.
- SDOperand N1 = Node->getOperand(1);
- if (N1.getOpcode() == ISD::LOAD && N1.hasOneUse() &&
- !CodeGenMap.count(N1.getValue(0))) {
- SDOperand Chain = Node->getOperand(0);
- SDNode *CallStart = FindCallStartFromCall(Chain.Val);
- if (!CallStart) break;
- SDNode *CSOp0 = CallStart->getOperand(0).Val;
- if (! (CSOp0 == N1.Val ||
- (CSOp0->getOpcode() == ISD::TokenFactor &&
- N1.Val->isOperand(CSOp0))))
- break;
- SDOperand Base, Scale, Index, Disp;
- if (SelectAddr(N1.getOperand(1), Base, Scale, Index, Disp)) {
- Select(Base, Base);
- Select(Scale, Scale);
- Select(Index, Index);
- Select(Disp, Disp);
- Select(Chain, Chain);
- bool HasOptInFlag = false;
- SDOperand InFlag;
- if (N.getNumOperands() == 3) {
- Select(InFlag, N.getOperand(2));
- HasOptInFlag = true;
- }
- SDNode *ResNode;
- if (HasOptInFlag)
- ResNode = CurDAG->getTargetNode(X86::CALL32m, MVT::Other, MVT::Flag,
- Base, Scale, Index, Disp, Chain,
- InFlag);
- else
- ResNode = CurDAG->getTargetNode(X86::CALL32m, MVT::Other, MVT::Flag,
- Base, Scale, Index, Disp, Chain);
-
- SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, 0, Chain.Val,
- Chain.ResNo);
- SelectionDAG::InsertISelMapEntry(CodeGenMap, N.Val, 1, ResNode, 1);
- // CALLSEQ_START needs a chain! It can't be ResNode, that would cause
- // a cycle. It should be the chain of the load.
- Select(Chain, N1.getOperand(0));
- SelectionDAG::InsertISelMapEntry(CodeGenMap, N1.Val, 1, Chain.Val,
- Chain.ResNo);
- AddHandleReplacement(N1.Val, 1, Chain.Val, Chain.ResNo);
- Result = SDOperand(ResNode, 0);
-
-#ifndef NDEBUG
- DEBUG(std::cerr << std::string(Indent-2, ' '));
- DEBUG(std::cerr << "== ");
- DEBUG(Result.Val->dump(CurDAG));
- DEBUG(std::cerr << "\n");
- Indent -= 2;
-#endif
- return;
- }
- }
- break;
- }
}
- SelectCode(Result, N);
+ SDNode *ResNode = SelectCode(N);
+
#ifndef NDEBUG
DEBUG(std::cerr << std::string(Indent-2, ' '));
DEBUG(std::cerr << "=> ");
- DEBUG(Result.Val->dump(CurDAG));
+ if (ResNode == NULL || ResNode == N.Val)
+ DEBUG(N.Val->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
DEBUG(std::cerr << "\n");
Indent -= 2;
#endif
+
+ return ResNode;
+}
+
+bool X86DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode,
+ std::vector<SDOperand> &OutOps, SelectionDAG &DAG){
+ SDOperand Op0, Op1, Op2, Op3;
+ switch (ConstraintCode) {
+ case 'o': // offsetable ??
+ case 'v': // not offsetable ??
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddr(Op, Op0, Op1, Op2, Op3))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ OutOps.push_back(Op2);
+ OutOps.push_back(Op3);
+ AddToISelQueue(Op0);
+ AddToISelQueue(Op1);
+ AddToISelQueue(Op2);
+ AddToISelQueue(Op3);
+ return false;
}
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM) {
- return new X86DAGToDAGISel(TM);
+FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) {
+ return new X86DAGToDAGISel(TM, Fast);
}