//
//===----------------------------------------------------------------------===//
-// Force NDEBUG on in any optimized build on Darwin.
-//
-// FIXME: This is a huge hack, to work around ridiculously awful compile times
-// on this file with gcc-4.2 on Darwin, in Release mode.
-#if (!defined(__llvm__) && defined(__APPLE__) && \
- defined(__OPTIMIZE__) && !defined(NDEBUG))
-#define NDEBUG
-#endif
-
#define DEBUG_TYPE "x86-isel"
#include "X86.h"
#include "X86InstrBuilder.h"
-#include "X86ISelLowering.h"
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/GlobalValue.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/Support/CFG.h"
FrameIndexBase
} BaseType;
- struct { // This is really a union, discriminated by BaseType!
- SDValue Reg;
- int FrameIndex;
- } Base;
+ // This is really a union, discriminated by BaseType!
+ SDValue Base_Reg;
+ int Base_FrameIndex;
unsigned Scale;
SDValue IndexReg;
int32_t Disp;
SDValue Segment;
- GlobalValue *GV;
- Constant *CP;
- BlockAddress *BlockAddr;
+ const GlobalValue *GV;
+ const Constant *CP;
+ const BlockAddress *BlockAddr;
const char *ES;
int JT;
unsigned Align; // CP alignment.
unsigned char SymbolFlags; // X86II::MO_*
X86ISelAddressMode()
- : BaseType(RegBase), Scale(1), IndexReg(), Disp(0),
+ : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
SymbolFlags(X86II::MO_NO_FLAG) {
}
}
bool hasBaseOrIndexReg() const {
- return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0;
+ return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
}
/// isRIPRelative - Return true if this addressing mode is already RIP
bool isRIPRelative() const {
if (BaseType != RegBase) return false;
if (RegisterSDNode *RegNode =
- dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode()))
+ dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
return RegNode->getReg() == X86::RIP;
return false;
}
void setBaseReg(SDValue Reg) {
BaseType = RegBase;
- Base.Reg = Reg;
+ Base_Reg = Reg;
}
void dump() {
dbgs() << "X86ISelAddressMode " << this << '\n';
- dbgs() << "Base.Reg ";
- if (Base.Reg.getNode() != 0)
- Base.Reg.getNode()->dump();
+ dbgs() << "Base_Reg ";
+ if (Base_Reg.getNode() != 0)
+ Base_Reg.getNode()->dump();
else
dbgs() << "nul";
- dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n'
+ dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
<< " Scale" << Scale << '\n'
<< "IndexReg ";
if (IndexReg.getNode() != 0)
class X86DAGToDAGISel : public SelectionDAGISel {
/// X86Lowering - This object fully describes how to lower LLVM code to an
/// X86-specific SelectionDAG.
- X86TargetLowering &X86Lowering;
+ const X86TargetLowering &X86Lowering;
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
return "X86 DAG->DAG Instruction Selection";
}
- /// InstructionSelect - This callback is invoked by
- /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
- virtual void InstructionSelect();
+ virtual void EmitFunctionEntryCode();
- virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
+ virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
- virtual
- bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
+ virtual void PreprocessISelDAG();
+
+ inline bool immSext8(SDNode *N) const {
+ return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
+ }
+
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ inline bool i64immSExt32(SDNode *N) const {
+ uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
+ return (int64_t)v == (int32_t)v;
+ }
// Include the pieces autogenerated from the target description.
#include "X86GenDAGISel.inc"
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
- bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
- bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
+ bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
- bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Scale, SDValue &Index, SDValue &Disp);
- bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Scale, SDValue &Index, SDValue &Disp);
- bool SelectScalarSSELoad(SDNode *Op, SDValue Pred,
- SDValue N, SDValue &Base, SDValue &Scale,
+ bool SelectLEAAddr(SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectScalarSSELoad(SDNode *Root, SDValue N,
+ SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment,
- SDValue &InChain, SDValue &OutChain);
+ SDValue &NodeWithChain);
+
bool TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment);
- void PreprocessForRMW();
- void PreprocessForFPConvert();
-
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
- CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
- AM.Base.Reg;
+ CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) :
+ AM.Base_Reg;
Scale = getI8Imm(AM.Scale);
Index = AM.IndexReg;
// These are 32-bit even in 64-bit mode since RIP relative offset
// is 32-bit.
if (AM.GV)
- Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp,
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(),
+ MVT::i32, AM.Disp,
AM.SymbolFlags);
else if (AM.CP)
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
return CurDAG->getTargetConstant(Imm, MVT::i8);
}
- /// getI16Imm - Return a target constant with the specified value, of type
- /// i16.
- inline SDValue getI16Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i16);
- }
-
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(unsigned Imm) {
const X86InstrInfo *getInstrInfo() {
return getTargetMachine().getInstrInfo();
}
-
-#ifndef NDEBUG
- unsigned Indent;
-#endif
};
}
-bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
- SDNode *Root) const {
+bool
+X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
if (OptLevel == CodeGenOpt::None) return false;
- if (U == Root)
+ if (!N.hasOneUse())
+ return false;
+
+ if (N.getOpcode() != ISD::LOAD)
+ return true;
+
+ // If N is a load, do additional profitability checks.
+ if (U == Root) {
switch (U->getOpcode()) {
default: break;
case X86ISD::ADD:
}
}
}
-
- // Proceed to 'generic' cycle finder code
- return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
-}
-
-/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
-/// and move load below the TokenFactor. Replace store's chain operand with
-/// load's chain result.
-static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
- SDValue Store, SDValue TF) {
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
- if (Load.getNode() == TF.getOperand(i).getNode())
- Ops.push_back(Load.getOperand(0));
- else
- Ops.push_back(TF.getOperand(i));
- SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
- SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF,
- Load.getOperand(1),
- Load.getOperand(2));
- CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1),
- Store.getOperand(2), Store.getOperand(3));
-}
-
-/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The
-/// chain produced by the load must only be used by the store's chain operand,
-/// otherwise this may produce a cycle in the DAG.
-///
-static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
- SDValue &Load) {
- if (N.getOpcode() == ISD::BIT_CONVERT)
- N = N.getOperand(0);
-
- LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
- if (!LD || LD->isVolatile())
- return false;
- if (LD->getAddressingMode() != ISD::UNINDEXED)
- return false;
-
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
- return false;
-
- if (N.hasOneUse() &&
- LD->hasNUsesOfValue(1, 1) &&
- N.getOperand(1) == Address &&
- LD->isOperandOf(Chain.getNode())) {
- Load = N;
- return true;
}
- return false;
+
+ return true;
}
-/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
-/// operand and move load below the call's chain operand.
-static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
- SDValue Call, SDValue CallSeqStart) {
+/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
+/// load's chain operand and move load below the call's chain operand.
+static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
+ SDValue Call, SDValue OrigChain) {
SmallVector<SDValue, 8> Ops;
- SDValue Chain = CallSeqStart.getOperand(0);
+ SDValue Chain = OrigChain.getOperand(0);
if (Chain.getNode() == Load.getNode())
Ops.push_back(Load.getOperand(0));
else {
assert(Chain.getOpcode() == ISD::TokenFactor &&
- "Unexpected CallSeqStart chain operand");
+ "Unexpected chain operand");
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
if (Chain.getOperand(i).getNode() == Load.getNode())
Ops.push_back(Load.getOperand(0));
Ops.clear();
Ops.push_back(NewChain);
}
- for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i)
- Ops.push_back(CallSeqStart.getOperand(i));
- CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size());
- CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
+ for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
+ Ops.push_back(OrigChain.getOperand(i));
+ CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
Load.getOperand(1), Load.getOperand(2));
Ops.clear();
Ops.push_back(SDValue(Load.getNode(), 1));
for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
Ops.push_back(Call.getOperand(i));
- CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size());
}
/// isCalleeLoad - Return true if call address is a load and it can be
/// moved below CALLSEQ_START and the chains leading up to the call.
/// Return the CALLSEQ_START by reference as a second output.
-static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
+/// In the case of a tail call, there isn't a callseq node between the call
+/// chain and the load.
+static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
return false;
LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
return false;
// Now let's find the callseq_start.
- while (Chain.getOpcode() != ISD::CALLSEQ_START) {
+ while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
if (!Chain.hasOneUse())
return false;
Chain = Chain.getOperand(0);
}
-
+
+ if (!Chain.getNumOperands())
+ return false;
if (Chain.getOperand(0).getNode() == Callee.getNode())
return true;
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
return false;
}
-
-/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
-/// This is only run if not in -O0 mode.
-/// This allows the instruction selector to pick more read-modify-write
-/// instructions. This is a common case:
-///
-/// [Load chain]
-/// ^
-/// |
-/// [Load]
-/// ^ ^
-/// | |
-/// / \-
-/// / |
-/// [TokenFactor] [Op]
-/// ^ ^
-/// | |
-/// \ /
-/// \ /
-/// [Store]
-///
-/// The fact the store's chain operand != load's chain will prevent the
-/// (store (op (load))) instruction from being selected. We can transform it to:
-///
-/// [Load chain]
-/// ^
-/// |
-/// [TokenFactor]
-/// ^
-/// |
-/// [Load]
-/// ^ ^
-/// | |
-/// | \-
-/// | |
-/// | [Op]
-/// | ^
-/// | |
-/// \ /
-/// \ /
-/// [Store]
-void X86DAGToDAGISel::PreprocessForRMW() {
+void X86DAGToDAGISel::PreprocessISelDAG() {
+ // OptForSize is used in pattern predicates that isel is matching.
+ OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
- if (I->getOpcode() == X86ISD::CALL) {
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ if (OptLevel != CodeGenOpt::None &&
+ (N->getOpcode() == X86ISD::CALL ||
+ N->getOpcode() == X86ISD::TC_RETURN)) {
/// Also try moving call address load from outside callseq_start to just
/// before the call to allow it to be folded.
///
/// \ /
/// \ /
/// [CALL]
- SDValue Chain = I->getOperand(0);
- SDValue Load = I->getOperand(1);
- if (!isCalleeLoad(Load, Chain))
+ bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
+ SDValue Chain = N->getOperand(0);
+ SDValue Load = N->getOperand(1);
+ if (!isCalleeLoad(Load, Chain, HasCallSeq))
continue;
- MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
+ MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
++NumLoadMoved;
continue;
}
-
- if (!ISD::isNON_TRUNCStore(I))
- continue;
- SDValue Chain = I->getOperand(0);
-
- if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
- continue;
-
- SDValue N1 = I->getOperand(1);
- SDValue N2 = I->getOperand(2);
- if ((N1.getValueType().isFloatingPoint() &&
- !N1.getValueType().isVector()) ||
- !N1.hasOneUse())
- continue;
-
- bool RModW = false;
- SDValue Load;
- unsigned Opcode = N1.getNode()->getOpcode();
- switch (Opcode) {
- case ISD::ADD:
- case ISD::MUL:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
- case ISD::ADDC:
- case ISD::ADDE:
- case ISD::VECTOR_SHUFFLE: {
- SDValue N10 = N1.getOperand(0);
- SDValue N11 = N1.getOperand(1);
- RModW = isRMWLoad(N10, Chain, N2, Load);
- if (!RModW)
- RModW = isRMWLoad(N11, Chain, N2, Load);
- break;
- }
- case ISD::SUB:
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::ROTL:
- case ISD::ROTR:
- case ISD::SUBC:
- case ISD::SUBE:
- case X86ISD::SHLD:
- case X86ISD::SHRD: {
- SDValue N10 = N1.getOperand(0);
- RModW = isRMWLoad(N10, Chain, N2, Load);
- break;
- }
- }
-
- if (RModW) {
- MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
- ++NumLoadMoved;
- }
- }
-}
-
-
-/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
-/// nodes that target the FP stack to be store and load to the stack. This is a
-/// gross hack. We would like to simply mark these as being illegal, but when
-/// we do that, legalize produces these when it expands calls, then expands
-/// these in the same legalize pass. We would like dag combine to be able to
-/// hack on these between the call expansion and the node legalization. As such
-/// this pass basically does "really late" legalization of these inline with the
-/// X86 isel pass.
-void X86DAGToDAGISel::PreprocessForFPConvert() {
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ) {
- SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ // Lower fpround and fpextend nodes that target the FP stack to be store and
+ // load to the stack. This is a gross hack. We would like to simply mark
+ // these as being illegal, but when we do that, legalize produces these when
+ // it expands calls, then expands these in the same legalize pass. We would
+ // like dag combine to be able to hack on these between the call expansion
+ // and the node legalization. As such this pass basically does "really
+ // late" legalization of these inline with the X86 isel pass.
+ // FIXME: This should only happen when not compiled with -O0.
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
// FIXME: optimize the case where the src/dest is a load or store?
SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
N->getOperand(0),
- MemTmp, NULL, 0, MemVT);
- SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
- NULL, 0, MemVT);
+ MemTmp, MachinePointerInfo(), MemVT,
+ false, false, 0);
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
// extload we created. This will cause general havok on the dag because
}
}
-/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
-/// when it has created a SelectionDAG for us to codegen.
-void X86DAGToDAGISel::InstructionSelect() {
- const Function *F = MF->getFunction();
- OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
-
- if (OptLevel != CodeGenOpt::None)
- PreprocessForRMW();
-
- // FIXME: This should only happen when not compiled with -O0.
- PreprocessForFPConvert();
-
- // Codegen the basic block.
-#ifndef NDEBUG
- DEBUG(dbgs() << "===== Instruction selection begins:\n");
- Indent = 0;
-#endif
- SelectRoot(*CurDAG);
-#ifndef NDEBUG
- DEBUG(dbgs() << "===== Instruction selection ends:\n");
-#endif
-
- CurDAG->RemoveDeadNodes();
-}
/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
/// the main function.
MachineFrameInfo *MFI) {
const TargetInstrInfo *TII = TM.getInstrInfo();
if (Subtarget->isTargetCygMing())
- BuildMI(BB, DebugLoc::getUnknownLoc(),
+ BuildMI(BB, DebugLoc(),
TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
}
-void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
+void X86DAGToDAGISel::EmitFunctionEntryCode() {
// If this is main, emit special code for main.
- MachineBasicBlock *BB = MF.begin();
- if (Fn.hasExternalLinkage() && Fn.getName() == "main")
- EmitSpecialCodeForMain(BB, MF.getFrameInfo());
+ if (const Function *Fn = MF->getFunction())
+ if (Fn->hasExternalLinkage() && Fn->getName() == "main")
+ EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
}
-bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
- X86ISelAddressMode &AM) {
- assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
- SDValue Segment = N.getOperand(0);
-
- if (AM.Segment.getNode() == 0) {
- AM.Segment = Segment;
- return false;
- }
-
- return true;
-}
-
-bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
+bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+ SDValue Address = N->getOperand(1);
+
+ // load gs:0 -> GS segment register.
+ // load fs:0 -> FS segment register.
+ //
// This optimization is valid because the GNU TLS model defines that
// gs:0 (or fs:0 on X86-64) contains its own address.
// For more information see http://people.redhat.com/drepper/tls.pdf
-
- SDValue Address = N.getOperand(1);
- if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
- !MatchSegmentBaseAddress (Address, AM))
- return false;
-
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+ if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
+ Subtarget->isTargetELF())
+ switch (N->getPointerInfo().getAddrSpace()) {
+ case 256:
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ return false;
+ case 257:
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ return false;
+ }
+
return true;
}
// a smaller encoding and avoids a scaled-index.
if (AM.Scale == 2 &&
AM.BaseType == X86ISelAddressMode::RegBase &&
- AM.Base.Reg.getNode() == 0) {
- AM.Base.Reg = AM.IndexReg;
+ AM.Base_Reg.getNode() == 0) {
+ AM.Base_Reg = AM.IndexReg;
AM.Scale = 1;
}
Subtarget->is64Bit() &&
AM.Scale == 1 &&
AM.BaseType == X86ISelAddressMode::RegBase &&
- AM.Base.Reg.getNode() == 0 &&
+ AM.Base_Reg.getNode() == 0 &&
AM.IndexReg.getNode() == 0 &&
AM.SymbolFlags == X86II::MO_NO_FLAG &&
AM.hasSymbolicDisplacement())
- AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+ AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
return false;
}
+/// isLogicallyAddWithConstant - Return true if this node is semantically an
+/// add of a value with a constantint.
+static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) {
+ // Check for (add x, Cst)
+ if (V->getOpcode() == ISD::ADD)
+ return isa<ConstantSDNode>(V->getOperand(1));
+
+ // Check for (or x, Cst), where Cst & x == 0.
+ if (V->getOpcode() != ISD::OR ||
+ !isa<ConstantSDNode>(V->getOperand(1)))
+ return false;
+
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1));
+
+ // Check to see if the LHS & C is zero.
+ return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue());
+}
+
bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
bool is64Bit = Subtarget->is64Bit();
break;
}
- case X86ISD::SegmentBaseAddress:
- if (!MatchSegmentBaseAddress(N, AM))
- return false;
- break;
-
case X86ISD::Wrapper:
case X86ISD::WrapperRIP:
if (!MatchWrapper(N, AM))
break;
case ISD::LOAD:
- if (!MatchLoad(N, AM))
+ if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
return false;
break;
case ISD::FrameIndex:
if (AM.BaseType == X86ISelAddressMode::RegBase
- && AM.Base.Reg.getNode() == 0) {
+ && AM.Base_Reg.getNode() == 0) {
AM.BaseType = X86ISelAddressMode::FrameIndexBase;
- AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
return false;
}
break;
// Okay, we know that we have a scale by now. However, if the scaled
// value is an add of something and a constant, we can fold the
// constant into the disp field here.
- if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
- isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
+ if (isLogicallyAddWithConstant(ShVal, CurDAG)) {
AM.IndexReg = ShVal.getNode()->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
case X86ISD::MUL_IMM:
// X*[3,5,9] -> X+X*[2,4,8]
if (AM.BaseType == X86ISelAddressMode::RegBase &&
- AM.Base.Reg.getNode() == 0 &&
+ AM.Base_Reg.getNode() == 0 &&
AM.IndexReg.getNode() == 0) {
if (ConstantSDNode
*CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
Reg = N.getNode()->getOperand(0);
}
- AM.IndexReg = AM.Base.Reg = Reg;
+ AM.IndexReg = AM.Base_Reg = Reg;
return false;
}
}
// other uses, since it avoids a two-address sub instruction, however
// it costs an additional mov if the index register has other uses.
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
// Test if the LHS of the sub can be folded.
X86ISelAddressMode Backup = AM;
if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
AM = Backup;
break;
}
+
int Cost = 0;
- SDValue RHS = N.getNode()->getOperand(1);
+ SDValue RHS = Handle.getValue().getNode()->getOperand(1);
// If the RHS involves a register with multiple uses, this
// transformation incurs an extra mov, due to the neg instruction
// clobbering its operand.
// If the base is a register with multiple uses, this
// transformation may save a mov.
if ((AM.BaseType == X86ISelAddressMode::RegBase &&
- AM.Base.Reg.getNode() &&
- !AM.Base.Reg.getNode()->hasOneUse()) ||
+ AM.Base_Reg.getNode() &&
+ !AM.Base_Reg.getNode()->hasOneUse()) ||
AM.BaseType == X86ISelAddressMode::FrameIndexBase)
--Cost;
// If the folded LHS was interesting, this transformation saves
}
case ISD::ADD: {
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+ SDValue LHS = Handle.getValue().getNode()->getOperand(0);
+ SDValue RHS = Handle.getValue().getNode()->getOperand(1);
+
X86ISelAddressMode Backup = AM;
- if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) &&
- !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1))
+ if (!MatchAddressRecursively(LHS, AM, Depth+1) &&
+ !MatchAddressRecursively(RHS, AM, Depth+1))
return false;
AM = Backup;
- if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) &&
- !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1))
+ LHS = Handle.getValue().getNode()->getOperand(0);
+ RHS = Handle.getValue().getNode()->getOperand(1);
+
+ // Try again after commuting the operands.
+ if (!MatchAddressRecursively(RHS, AM, Depth+1) &&
+ !MatchAddressRecursively(LHS, AM, Depth+1))
return false;
AM = Backup;
+ LHS = Handle.getValue().getNode()->getOperand(0);
+ RHS = Handle.getValue().getNode()->getOperand(1);
// If we couldn't fold both operands into the address at the same time,
// see if we can just put each operand into a register and fold at least
// the add.
if (AM.BaseType == X86ISelAddressMode::RegBase &&
- !AM.Base.Reg.getNode() &&
+ !AM.Base_Reg.getNode() &&
!AM.IndexReg.getNode()) {
- AM.Base.Reg = N.getNode()->getOperand(0);
- AM.IndexReg = N.getNode()->getOperand(1);
+ AM.Base_Reg = LHS;
+ AM.IndexReg = RHS;
AM.Scale = 1;
return false;
}
case ISD::OR:
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ if (isLogicallyAddWithConstant(N, CurDAG)) {
X86ISelAddressMode Backup = AM;
+ ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
uint64_t Offset = CN->getSExtValue();
+
// Start with the LHS as an addr mode.
if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
// Address could not have picked a GV address for the displacement.
// On x86-64, the resultant disp must fit in 32-bits.
(!is64Bit ||
X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M,
- AM.hasSymbolicDisplacement())) &&
- // Check to see if the LHS & C is zero.
- CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+ AM.hasSymbolicDisplacement()))) {
AM.Disp += Offset;
return false;
}
/// specified addressing mode without any further recursion.
bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
// Is the base register already occupied?
- if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
+ if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
// If so, check to see if the scale index register is set.
if (AM.IndexReg.getNode() == 0) {
AM.IndexReg = N;
// Default, generate it as a register.
AM.BaseType = X86ISelAddressMode::RegBase;
- AM.Base.Reg = N;
+ AM.Base_Reg = N;
return false;
}
/// SelectAddr - returns true if it is able pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
-bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
+///
+/// Parent is the parent node of the addr operand that is being matched. It
+/// is always a load, store, atomic node, or null. It is only null when
+/// checking memory operands for inline asm nodes.
+bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
+
+ if (Parent &&
+ // This list of opcodes are all the nodes that have an "addr:$ptr" operand
+ // that are not a MemSDNode, and thus don't have proper addrspace info.
+ Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
+ Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
+ Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+ unsigned AddrSpace =
+ cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
+ // AddrSpace 256 -> GS, 257 -> FS.
+ if (AddrSpace == 256)
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ if (AddrSpace == 257)
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ }
+
if (MatchAddress(N, AM))
return false;
EVT VT = N.getValueType();
if (AM.BaseType == X86ISelAddressMode::RegBase) {
- if (!AM.Base.Reg.getNode())
- AM.Base.Reg = CurDAG->getRegister(0, VT);
+ if (!AM.Base_Reg.getNode())
+ AM.Base_Reg = CurDAG->getRegister(0, VT);
}
if (!AM.IndexReg.getNode())
/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
/// match a load whose top elements are either undef or zeros. The load flavor
/// is derived from the type of N, which is either v4f32 or v2f64.
-bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred,
+///
+/// We also return:
+/// PatternChainNode: this is the matched node that has a chain input and
+/// output.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment,
- SDValue &InChain,
- SDValue &OutChain) {
+ SDValue &PatternNodeWithChain) {
if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- InChain = N.getOperand(0).getValue(1);
- if (ISD::isNON_EXTLoad(InChain.getNode()) &&
- InChain.getValue(0).hasOneUse() &&
- N.hasOneUse() &&
- IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) {
- LoadSDNode *LD = cast<LoadSDNode>(InChain);
- if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ PatternNodeWithChain = N.getOperand(0);
+ if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
+ PatternNodeWithChain.hasOneUse() &&
+ IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+ IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+ LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+ if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
- OutChain = LD->getChain();
return true;
}
}
N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
N.getOperand(0).getNode()->hasOneUse() &&
ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
- N.getOperand(0).getOperand(0).hasOneUse()) {
+ N.getOperand(0).getOperand(0).hasOneUse() &&
+ IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+ IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
// Okay, this is a zero extending load. Fold it.
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
- if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
- OutChain = LD->getChain();
- InChain = SDValue(LD, 1);
+ PatternNodeWithChain = SDValue(LD, 0);
return true;
}
return false;
/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
-bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
+bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
SDValue &Base, SDValue &Scale,
- SDValue &Index, SDValue &Disp) {
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
X86ISelAddressMode AM;
// Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
EVT VT = N.getValueType();
unsigned Complexity = 0;
if (AM.BaseType == X86ISelAddressMode::RegBase)
- if (AM.Base.Reg.getNode())
+ if (AM.Base_Reg.getNode())
Complexity = 1;
else
- AM.Base.Reg = CurDAG->getRegister(0, VT);
+ AM.Base_Reg = CurDAG->getRegister(0, VT);
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
Complexity = 4;
Complexity += 2;
}
- if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
+ if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
Complexity++;
// If it isn't worth using an LEA, reject it.
if (Complexity <= 2)
return false;
- SDValue Segment;
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
-bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
- SDValue &Disp) {
- assert(Op->getOpcode() == X86ISD::TLSADDR);
+ SDValue &Disp, SDValue &Segment) {
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
-
+
X86ISelAddressMode AM;
AM.GV = GA->getGlobal();
AM.Disp += GA->getOffset();
- AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
+ AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
AM.SymbolFlags = GA->getTargetFlags();
if (N.getValueType() == MVT::i32) {
AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
}
- SDValue Segment;
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
- if (ISD::isNON_EXTLoad(N.getNode()) &&
- N.hasOneUse() &&
- IsLegalAndProfitableToFold(N.getNode(), P, P))
- return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
- return false;
+ if (!ISD::isNON_EXTLoad(N.getNode()) ||
+ !IsProfitableToFold(N, P, P) ||
+ !IsLegalToFold(N, P, P, OptLevel))
+ return false;
+
+ return SelectAddr(N.getNode(),
+ N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
/// getGlobalBaseReg - Return an SDNode that returns the value of
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
-static SDNode *FindCallStartFromCall(SDNode *Node) {
- if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
- assert(Node->getOperand(0).getValueType() == MVT::Other &&
- "Node doesn't have a token chain argument!");
- return FindCallStartFromCall(Node->getOperand(0).getNode());
-}
-
SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
SDValue Chain = Node->getOperand(0);
SDValue In1 = Node->getOperand(1);
SDValue In2L = Node->getOperand(2);
SDValue In2H = Node->getOperand(3);
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return NULL;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
SDValue Ptr = Node->getOperand(1);
SDValue Val = Node->getOperand(2);
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return 0;
bool isInc = false, isDec = false, isSub = false, isCN = false;
Opc = X86::LOCK_DEC16m;
else if (isSub) {
if (isCN) {
- if (Predicate_i16immSExt8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_SUB16mi8;
else
Opc = X86::LOCK_SUB16mi;
Opc = X86::LOCK_SUB16mr;
} else {
if (isCN) {
- if (Predicate_i16immSExt8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_ADD16mi8;
else
Opc = X86::LOCK_ADD16mi;
Opc = X86::LOCK_DEC32m;
else if (isSub) {
if (isCN) {
- if (Predicate_i32immSExt8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_SUB32mi8;
else
Opc = X86::LOCK_SUB32mi;
Opc = X86::LOCK_SUB32mr;
} else {
if (isCN) {
- if (Predicate_i32immSExt8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_ADD32mi8;
else
Opc = X86::LOCK_ADD32mi;
else if (isSub) {
Opc = X86::LOCK_SUB64mr;
if (isCN) {
- if (Predicate_i64immSExt8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_SUB64mi8;
- else if (Predicate_i64immSExt32(Val.getNode()))
+ else if (i64immSExt32(Val.getNode()))
Opc = X86::LOCK_SUB64mi32;
}
} else {
Opc = X86::LOCK_ADD64mr;
if (isCN) {
- if (Predicate_i64immSExt8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_ADD64mi8;
- else if (Predicate_i64immSExt32(Val.getNode()))
+ else if (i64immSExt32(Val.getNode()))
Opc = X86::LOCK_ADD64mi32;
}
}
}
DebugLoc dl = Node->getDebugLoc();
- SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
+ SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, NVT), 0);
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
- case X86::JA: case X86::JAE: case X86::JB: case X86::JBE:
- case X86::JE: case X86::JNE: case X86::JP: case X86::JNP:
+ case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
+ case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
case X86::CMOVA16rr: case X86::CMOVA16rm:
case X86::CMOVA32rr: case X86::CMOVA32rm:
case X86::CMOVA64rr: case X86::CMOVA64rm:
unsigned Opcode = Node->getOpcode();
DebugLoc dl = Node->getDebugLoc();
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent, ' ') << "Selecting: ";
- Node->dump(CurDAG);
- dbgs() << '\n';
- });
- Indent += 2;
-#endif
+ DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
if (Node->isMachineOpcode()) {
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "== ";
- Node->dump(CurDAG);
- dbgs() << '\n';
- });
- Indent -= 2;
-#endif
+ DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
return NULL; // Already selected.
}
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
}
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ // Get the low part if needed. Don't use getCopyFromReg for aliasing
+ // registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX down 8 bits.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)), 0);
+ // Then truncate it down to i8.
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- Result.getNode()->dump(CurDAG);
- dbgs() << '\n';
- });
-#endif
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result;
- if (HiReg == X86::AH && Subtarget->is64Bit()) {
- // Prevent use of AH in a REX instruction by referencing AX instead.
- // Shift it down 8 bits.
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::AX, MVT::i16, InFlag);
- InFlag = Result.getValue(2);
- Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
- Result,
- CurDAG->getTargetConstant(8, MVT::i8)), 0);
- // Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
- MVT::i8, Result);
- } else {
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- }
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- Result.getNode()->dump(CurDAG);
- dbgs() << '\n';
- });
-#endif
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-#ifndef NDEBUG
- Indent -= 2;
-#endif
-
return NULL;
}
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
}
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+
+ // If we also need AL (the quotient), get it by extracting a subreg from
+ // Result. The fast register allocator does not like multiple CopyFromReg
+ // nodes using aliasing registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX right by 8 bits instead of using AH.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)),
+ 0);
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
// Copy the division (low) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- Result.getNode()->dump(CurDAG);
- dbgs() << '\n';
- });
-#endif
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the remainder (high) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result;
- if (HiReg == X86::AH && Subtarget->is64Bit()) {
- // Prevent use of AH in a REX instruction by referencing AX instead.
- // Shift it down 8 bits.
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::AX, MVT::i16, InFlag);
- InFlag = Result.getValue(2);
- Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
- Result,
- CurDAG->getTargetConstant(8, MVT::i8)),
- 0);
- // Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
- MVT::i8, Result);
- } else {
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- }
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- Result.getNode()->dump(CurDAG);
- dbgs() << '\n';
- });
-#endif
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-
-#ifndef NDEBUG
- Indent -= 2;
-#endif
-
return NULL;
}
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
// use a smaller encoding.
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ HasNoSignedComparisonUses(Node))
+ // Look past the truncate if CMP is the only use of it.
+ N0 = N0.getOperand(0);
if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
N0.getValueType() != MVT::i8 &&
X86::isZeroNode(N1)) {
}
// Extract the l-register.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Reg);
// Emit a testb.
Reg.getValueType(), Reg, RC), 0);
// Extract the h-register.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
MVT::i8, Reg);
// Emit a testb. No special NOREX tricks are needed since there's
SDValue Reg = N0.getNode()->getOperand(0);
// Extract the 16-bit subregister.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
MVT::i16, Reg);
// Emit a testw.
SDValue Reg = N0.getNode()->getOperand(0);
// Extract the 32-bit subregister.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
MVT::i32, Reg);
// Emit a testl.
SDNode *ResNode = SelectCode(Node);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- if (ResNode == NULL || ResNode == Node)
- Node->dump(CurDAG);
- else
- ResNode->dump(CurDAG);
- dbgs() << '\n';
- });
- Indent -= 2;
-#endif
+ DEBUG(dbgs() << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ dbgs() << '\n');
return ResNode;
}
case 'v': // not offsetable ??
default: return true;
case 'm': // memory
- if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4))
+ if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
return true;
break;
}