#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
/// load's chain operand and move load below the call's chain operand.
static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
- SDValue Call, SDValue OrigChain) {
+ SDValue Call, SDValue OrigChain) {
SmallVector<SDValue, 8> Ops;
SDValue Chain = OrigChain.getOperand(0);
if (Chain.getNode() == Load.getNode())
CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
Load.getOperand(1), Load.getOperand(2));
+
+ unsigned NumOps = Call.getNode()->getNumOperands();
Ops.clear();
Ops.push_back(SDValue(Load.getNode(), 1));
- for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
+ for (unsigned i = 1, e = NumOps; i != e; ++i)
Ops.push_back(Call.getOperand(i));
- CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps);
}
/// isCalleeLoad - Return true if call address is a load and it can be
/// In the case of a tail call, there isn't a callseq node between the call
/// chain and the load.
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
+ // The transformation is somewhat dangerous if the call's chain was glued to
+ // the call. After MoveBelowOrigChain the load is moved between the call and
+ // the chain, this can create a cycle if the load is not folded. So it is
+ // *really* important that we are sure the load will be folded.
if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
return false;
LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
if (!Chain.getNumOperands())
return false;
+ // Since we are not checking for AA here, conservatively abort if the chain
+ // writes to memory. It's not safe to move the callee (a load) across a store.
+ if (isa<MemSDNode>(Chain.getNode()) &&
+ cast<MemSDNode>(Chain.getNode())->writeMem())
+ return false;
if (Chain.getOperand(0).getNode() == Callee.getNode())
return true;
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.
- OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ OptForSize = MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
if (OptLevel != CodeGenOpt::None &&
(N->getOpcode() == X86ISD::CALL ||
- N->getOpcode() == X86ISD::TC_RETURN)) {
+ (N->getOpcode() == X86ISD::TC_RETURN &&
+ // Only does this if load can be folded into TC_RETURN.
+ (Subtarget->is64Bit() ||
+ getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
/// Also try moving call address load from outside callseq_start to just
/// before the call to allow it to be folded.
///
AM.IndexReg = ShVal;
return false;
}
- break;
}
+ break;
case ISD::SRL: {
// Scale must not be used already.
// that are not a MemSDNode, and thus don't have proper addrspace info.
Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
- Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+ Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
+ Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
+ Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
unsigned AddrSpace =
cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
// AddrSpace 256 -> GS, 257 -> FS.
case X86ISD::ATOMSUB64_DAG:
case X86ISD::ATOMNAND64_DAG:
case X86ISD::ATOMAND64_DAG:
+ case X86ISD::ATOMMAX64_DAG:
+ case X86ISD::ATOMMIN64_DAG:
+ case X86ISD::ATOMUMAX64_DAG:
+ case X86ISD::ATOMUMIN64_DAG:
case X86ISD::ATOMSWAP64_DAG: {
unsigned Opc;
switch (Opcode) {
case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
+ case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break;
+ case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break;
+ case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break;
+ case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break;
case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
}
SDNode *RetVal = SelectAtomic64(Node, Opc);
SDValue N1 = Node->getOperand(1);
bool isSigned = Opcode == ISD::SMUL_LOHI;
+ bool hasBMI2 = Subtarget->hasBMI2();
if (!isSigned) {
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
- case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
- case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
+ case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
+ MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
+ case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
+ MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
}
} else {
switch (NVT.getSimpleVT().SimpleTy) {
}
}
- unsigned LoReg, HiReg;
- switch (NVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unsupported VT!");
- case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
- case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
- case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
- case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+ unsigned SrcReg, LoReg, HiReg;
+ switch (Opc) {
+ default: llvm_unreachable("Unknown MUL opcode!");
+ case X86::IMUL8r:
+ case X86::MUL8r:
+ SrcReg = LoReg = X86::AL; HiReg = X86::AH;
+ break;
+ case X86::IMUL16r:
+ case X86::MUL16r:
+ SrcReg = LoReg = X86::AX; HiReg = X86::DX;
+ break;
+ case X86::IMUL32r:
+ case X86::MUL32r:
+ SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
+ break;
+ case X86::IMUL64r:
+ case X86::MUL64r:
+ SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
+ break;
+ case X86::MULX32rr:
+ SrcReg = X86::EDX; LoReg = HiReg = 0;
+ break;
+ case X86::MULX64rr:
+ SrcReg = X86::RDX; LoReg = HiReg = 0;
+ break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
std::swap(N0, N1);
}
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
N0, SDValue()).getValue(1);
+ SDValue ResHi, ResLo;
if (foldedLoad) {
+ SDValue Chain;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
- SDNode *CNode =
- CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
- array_lengthof(Ops));
- InFlag = SDValue(CNode, 1);
+ if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ Chain = SDValue(CNode, 2);
+ InFlag = SDValue(CNode, 3);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ Chain = SDValue(CNode, 0);
+ InFlag = SDValue(CNode, 1);
+ }
// Update the chain.
- ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ ReplaceUses(N1.getValue(1), Chain);
} else {
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
- InFlag = SDValue(CNode, 0);
+ SDValue Ops[] = { N1, InFlag };
+ if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ InFlag = SDValue(CNode, 2);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 0);
+ }
}
// Prevent use of AH in a REX instruction by referencing AX instead.
}
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- LoReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 0), Result);
- DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ if (ResLo.getNode() == 0) {
+ assert(LoReg && "Register for low half is not defined!");
+ ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
+ InFlag);
+ InFlag = ResLo.getValue(2);
+ }
+ ReplaceUses(SDValue(Node, 0), ResLo);
+ DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 1), Result);
- DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ if (ResHi.getNode() == 0) {
+ assert(HiReg && "Register for high half is not defined!");
+ ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
+ InFlag);
+ InFlag = ResHi.getValue(2);
+ }
+ ReplaceUses(SDValue(Node, 1), ResHi);
+ DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
}
return NULL;
MVT::i8, Reg);
// Emit a testb.
- return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testl %eax, $2048" to "testb %ah, $8".
// Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
// target GR8_NOREX registers, so make sure the register class is
// forced.
- return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32,
- Subreg, ShiftedImm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
+ MVT::i32, Subreg, ShiftedImm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testl %eax, $32776" to "testw %ax, $32776".
MVT::i16, Reg);
// Emit a testw.
- return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testq %rax, $268468232" to "testl %eax, $268468232".
MVT::i32, Reg);
// Emit a testl.
- return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
}
break;
return Result;
}
-
- // FIXME: Custom handling because TableGen doesn't support multiple implicit
- // defs in an instruction pattern
- case X86ISD::PCMPESTRI: {
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
- SDValue N2 = Node->getOperand(2);
- SDValue N3 = Node->getOperand(3);
- SDValue N4 = Node->getOperand(4);
-
- // Make sure last argument is a constant
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4);
- if (!Cst)
- break;
-
- uint64_t Imm = Cst->getZExtValue();
-
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
- X86::EAX, N1, SDValue()).getValue(1);
- InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
- N3, InFlag).getValue(1);
-
- SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag };
- unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr :
- X86::PCMPESTRIrr;
- InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
- array_lengthof(Ops)), 0);
-
- if (!SDValue(Node, 0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::ECX, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 0), Result);
- }
- if (!SDValue(Node, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::EFLAGS, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 1), Result);
- }
-
- return NULL;
- }
-
- // FIXME: Custom handling because TableGen doesn't support multiple implicit
- // defs in an instruction pattern
- case X86ISD::PCMPISTRI: {
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
- SDValue N2 = Node->getOperand(2);
-
- // Make sure last argument is a constant
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2);
- if (!Cst)
- break;
-
- uint64_t Imm = Cst->getZExtValue();
-
- SDValue Ops[] = { N0, N1, getI8Imm(Imm) };
- unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr :
- X86::PCMPISTRIrr;
- SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
- array_lengthof(Ops)), 0);
-
- if (!SDValue(Node, 0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::ECX, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 0), Result);
- }
- if (!SDValue(Node, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::EFLAGS, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 1), Result);
- }
-
- return NULL;
- }
}
SDNode *ResNode = SelectCode(Node);