//
// The LLVM Compiler Infrastructure
//
-// This file was developed by Chris Lattner and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ParameterAttributes.h"
using namespace llvm;
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
-
+ setOperationAction(ISD::FLT_ROUNDS , MVT::i32 , Custom);
+
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
setOperationAction(ISD::FCOS , MVT::f80 , Expand);
}
+ // Always use a library call for pow.
+ setOperationAction(ISD::FPOW , MVT::f32 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f64 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f80 , Expand);
+
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SHL, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SRA, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SRL, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::ROTL, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::ROTR, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::BSWAP, (MVT::ValueType)VT, Expand);
}
if (Subtarget->hasMMX()) {
setOperationAction(ISD::SUB, MVT::v8i8, Legal);
setOperationAction(ISD::SUB, MVT::v4i16, Legal);
setOperationAction(ISD::SUB, MVT::v2i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v1i64, Legal);
setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
setOperationAction(ISD::MUL, MVT::v4i16, Legal);
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
+ // Do not attempt to custom lower non-power-of-2 vectors
+ if (!isPowerOf2_32(MVT::getVectorNumElements(VT)))
+ continue;
setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
}
+/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+/// jumptable.
+SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table,
+ SelectionDAG &DAG) const {
+ if (usesGlobalOffsetTable())
+ return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy());
+ if (!Subtarget->isPICStyleRIPRel())
+ return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy());
+ return Table;
+}
+
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
// If this is the first return lowered for this function, add the regs to the
// liveout set for the function.
- if (DAG.getMachineFunction().liveout_empty()) {
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
for (unsigned i = 0; i != RVLocs.size(); ++i)
if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
}
SDOperand Chain = Op.getOperand(0);
Operands.push_back(StackAdjustment);
// Copy registers used by the call. Last operand is a flag so it is not
// copied.
- for(unsigned i=3; i < TailCall.getNumOperands()-1;i++) {
+ for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
Operands.push_back(Chain.getOperand(i));
}
- return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], Operands.size());
+ return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0],
+ Operands.size());
}
// Regular return.
CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
-
SmallVector<SDOperand, 8> ResultVals;
// Copy all of the result registers out of their specified physreg.
// an XMM register.
if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) ||
(X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) {
+ SDOperand StoreLoc;
+ const Value *SrcVal = 0;
+ int SrcValOffset = 0;
+ MVT::ValueType RetStoreVT = RVLocs[0].getValVT();
+
+ // Determine where to store the value. If the call result is directly
+ // used by a store, see if we can store directly into the location. In
+ // this case, we'll end up producing a fst + movss[load] + movss[store] to
+ // the same location, and the two movss's will be nuked as dead. This
+ // optimizes common things like "*D = atof(..)" to not need an
+ // intermediate stack slot.
+ if (SDOperand(TheCall, 0).hasOneUse() &&
+ SDOperand(TheCall, 1).hasOneUse()) {
+ // In addition to direct uses, we also support a FP_ROUND that uses the
+ // value, if it is directly stored somewhere.
+ SDNode *User = *TheCall->use_begin();
+ if (User->getOpcode() == ISD::FP_ROUND && User->hasOneUse())
+ User = *User->use_begin();
+
+ // Ok, we have one use of the value and one use of the chain. See if
+ // they are the same node: a store.
+ if (StoreSDNode *N = dyn_cast<StoreSDNode>(User)) {
+ // Verify that the value being stored is either the call or a
+ // truncation of the call.
+ SDNode *StoreVal = N->getValue().Val;
+ if (StoreVal == TheCall)
+ ; // ok.
+ else if (StoreVal->getOpcode() == ISD::FP_ROUND &&
+ StoreVal->hasOneUse() &&
+ StoreVal->getOperand(0).Val == TheCall)
+ ; // ok.
+ else
+ N = 0; // not ok.
+
+ if (N && N->getChain().Val == TheCall &&
+ !N->isVolatile() && !N->isTruncatingStore() &&
+ N->getAddressingMode() == ISD::UNINDEXED) {
+ StoreLoc = N->getBasePtr();
+ SrcVal = N->getSrcValue();
+ SrcValOffset = N->getSrcValueOffset();
+ RetStoreVT = N->getValue().getValueType();
+ }
+ }
+ }
+
+ // If we weren't able to optimize the result, just create a temporary
+ // stack slot.
+ if (StoreLoc.Val == 0) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+ StoreLoc = DAG.getFrameIndex(SSFI, getPointerTy());
+ }
+
// FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
// shouldn't be necessary except that RFP cannot be live across
- // multiple blocks. When stackifier is fixed, they can be uncoupled.
- MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
- SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ // multiple blocks (which could happen if a select gets lowered into
+ // multiple blocks and scheduled in between them). When stackifier is
+ // fixed, they can be uncoupled.
SDOperand Ops[] = {
- Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
+ Chain, RetVal, StoreLoc, DAG.getValueType(RetStoreVT), InFlag
};
Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
- RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
+ RetVal = DAG.getLoad(RetStoreVT, Chain,
+ StoreLoc, SrcVal, SrcValOffset);
Chain = RetVal.getValue(1);
+
+ // If we optimized a truncate, then extend the result back to its desired
+ // type.
+ if (RVLocs[0].getValVT() != RetStoreVT)
+ RetVal = DAG.getNode(ISD::FP_EXTEND, RVLocs[0].getValVT(), RetVal);
}
ResultVals.push_back(RetVal);
}
static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
const TargetRegisterClass *RC) {
assert(RC->contains(PReg) && "Not the correct regclass!");
- unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
- MF.addLiveIn(PReg, VReg);
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
return VReg;
}
-// align stack arguments according to platform alignment needed for tail calls
-unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG);
-
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
const CCValAssign &VA,
MachineFrameInfo *MFI,
SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
bool isStdCall) {
- unsigned NumArgs = Op.Val->getNumValues() - 1;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
SDOperand Root = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
unsigned CC = MF.getFunction()->getCallingConv();
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
+
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg,
- getTargetMachine(), ArgLocs);
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
// Check for possible tail call calling convention.
if (CC == CallingConv::Fast && PerformTailCallOpt)
CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_TailCall);
assert(MVT::isVector(RegVT));
RC = X86::VR128RegisterClass;
}
-
+
unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
}
}
-
+
unsigned StackSize = CCInfo.getNextStackOffset();
// align stack specially for tail calls
- if (CC==CallingConv::Fast)
- StackSize = GetAlignedArgumentStackSize(StackSize,DAG);
-
- ArgValues.push_back(Root);
+ if (CC == CallingConv::Fast)
+ StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
- if (isVarArg)
+ if (isVarArg) {
VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
+ }
- // Tail call calling convention (CallingConv::Fast) does not support varargs.
- assert( !(isVarArg && CC == CallingConv::Fast) &&
- "CallingConv::Fast does not support varargs.");
+ ArgValues.push_back(Root);
+ // Tail call convention (fastcc) needs callee pop.
if (isStdCall && !isVarArg &&
(CC==CallingConv::Fast && PerformTailCallOpt || CC!=CallingConv::Fast)) {
- BytesToPopOnReturn = StackSize; // Callee pops everything..
+ BytesToPopOnReturn = StackSize; // Callee pops everything..
BytesCallerReserves = 0;
} else {
BytesToPopOnReturn = 0; // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
+ unsigned NumArgs = Op.Val->getNumValues() - 1;
if (NumArgs &&
(cast<ConstantSDNode>(Op.getOperand(3))->getValue() &
ISD::ParamFlags::StructReturn))
BytesCallerReserves = StackSize;
}
-
- RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
+
+ RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
}
-SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
- unsigned CC) {
+SDOperand
+X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
+ unsigned CC) {
SDOperand Chain = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
SDOperand Callee = Op.getOperand(4);
- unsigned NumOps = (Op.getNumOperands() - 5) / 2;
-
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
+
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- if(CC==CallingConv::Fast && PerformTailCallOpt)
+ if (CC==CallingConv::Fast && PerformTailCallOpt)
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall);
else
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
- if (CC==CallingConv::Fast)
+ if (CC == CallingConv::Fast)
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
}
// If the first argument is an sret pointer, remember it.
+ unsigned NumOps = (Op.getNumOperands() - 5) / 2;
bool isSRet = NumOps &&
(cast<ConstantSDNode>(Op.getOperand(6))->getValue() &
ISD::ParamFlags::StructReturn);
InFlag);
InFlag = Chain.getValue(1);
}
-
+
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
InFlag);
InFlag = Chain.getValue(1);
}
-
+
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
getTargetMachine(), true))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
-
+ }
+
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDOperand, 8> Ops;
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT())
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
-
+
if (InFlag.Val)
Ops.push_back(InFlag);
-
+
Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush = 0;
-
- if (CC == CallingConv::X86_StdCall ||
- (CC == CallingConv::Fast && PerformTailCallOpt)) {
- if (isVarArg)
- NumBytesForCalleeToPush = isSRet ? 4 : 0;
- else
- NumBytesForCalleeToPush = NumBytes;
- assert(!(isVarArg && CC==CallingConv::Fast) &&
- "CallingConv::Fast does not support varargs.");
- } else {
+ if (!isVarArg && (CC == CallingConv::X86_StdCall
+ || CC == CallingConv::Fast && PerformTailCallOpt)) {
+ NumBytesForCalleeToPush = NumBytes; // Callee pops everything
+ } else if (isSRet) {
// If this is is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
- NumBytesForCalleeToPush = isSRet ? 4 : 0;
+ NumBytesForCalleeToPush = 4;
+ } else {
+ NumBytesForCalleeToPush = 0; // Callee pops nothing.
}
-
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+
+ // Returns a flag for retval copy to use.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy()),
+ DAG.getConstant(NumBytesForCalleeToPush,
+ getPointerTy()),
+ InFlag);
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// This calling convention always arranges for the callee pop value to be 8n+4
// bytes, which is needed for tail recursion elimination and stack alignment
// reasons.
+
SDOperand
X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
SDOperand Root = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ unsigned CC = MF.getFunction()->getCallingConv();
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
- getTargetMachine(), ArgLocs);
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall);
SmallVector<SDOperand, 8> ArgValues;
assert(MVT::isVector(RegVT));
RC = X86::VR128RegisterClass;
}
-
+
unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
}
}
-
- ArgValues.push_back(Root);
unsigned StackSize = CCInfo.getNextStackOffset();
StackSize += 4;
}
- VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
+ ArgValues.push_back(Root);
+
RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
+ VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
BytesToPopOnReturn = StackSize; // Callee pops all stack arguments.
BytesCallerReserves = 0;
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
+ SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
- return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode,
- AlignNode);
+ return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode,
+ AlwaysInline);
} else {
return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
}
}
-SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
- unsigned CC) {
+SDOperand
+X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
+ unsigned CC) {
SDOperand Chain = Op.getOperand(0);
- bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
SDOperand Callee = Op.getOperand(4);
+ assert(!cast<ConstantSDNode>(Op.getOperand(3))->getValue() &&
+ "Tail calls should not reach here.");
+
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
-
if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
// Make sure the instruction takes 8n+4 bytes to make sure the start of the
// arguments and the arguments after the retaddr has been pushed are
}
Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
-
+
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
SmallVector<SDOperand, 8> MemOpChains;
-
+
SDOperand StackPtr;
-
+
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
- break;
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
+ break;
}
if (VA.isRegLoc()) {
if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
getTargetMachine(), true))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
-
+ }
+
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
InFlag);
InFlag = Chain.getValue(1);
}
-
+
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDOperand, 8> Ops;
if (InFlag.Val)
Ops.push_back(InFlag);
- assert(isTailCall==false && "no tail call here");
- Chain = DAG.getNode(X86ISD::CALL,
- NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
+ // Create the CALLSEQ_END node.
+ unsigned NumBytesForCalleeToPush = NumBytes;
+
// Returns a flag for retval copy to use.
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy()),
+ DAG.getConstant(NumBytesForCalleeToPush,
+ getPointerTy()),
+ InFlag);
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// * elf/pic is disabled OR
// * elf/pic enabled + callee is in module + callee has
// visibility protected or hidden
-// To ensure the stack is aligned according to platform abi pass
-// tail-call-align-stack. This makes sure that argument delta is always
-// multiples of stack alignment. (Dynamic linkers need this - darwin's dyld for
-// example)
+// To keep the stack aligned according to platform abi the function
+// GetAlignedArgumentStackSize ensures that argument delta is always multiples
+// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
// If a tail called function callee has more arguments than the caller the
// caller needs to make sure that there is room to move the RETADDR to. This is
-// achived by reserving an area the size of the argument delta right after the
+// achieved by reserving an area the size of the argument delta right after the
// original REtADDR, but before the saved framepointer or the spilled registers
// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
// stack layout:
}
/// IsEligibleForTailCallElimination - Check to see whether the next instruction
-// following the call is a return. A function is eligible if caller/callee
-// calling conventions match, currently only fastcc supports tail calls, and the
-// function CALL is immediatly followed by a RET.
+/// following the call is a return. A function is eligible if caller/callee
+/// calling conventions match, currently only fastcc supports tail calls, and
+/// the function CALL is immediatly followed by a RET.
bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
SDOperand Ret,
SelectionDAG& DAG) const {
- bool IsEligible = false;
+ if (!PerformTailCallOpt)
+ return false;
// Check whether CALL node immediatly preceeds the RET node and whether the
// return uses the result of the node or is a void return.
- if ((Ret.getNumOperands() == 1 &&
- (Ret.getOperand(0)== SDOperand(Call.Val,1) ||
- Ret.getOperand(0)== SDOperand(Call.Val,0))) ||
- (Ret.getOperand(0)== SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
- Ret.getOperand(1)== SDOperand(Call.Val,0))) {
+ unsigned NumOps = Ret.getNumOperands();
+ if ((NumOps == 1 &&
+ (Ret.getOperand(0) == SDOperand(Call.Val,1) ||
+ Ret.getOperand(0) == SDOperand(Call.Val,0))) ||
+ (NumOps > 1 &&
+ Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
+ Ret.getOperand(1) == SDOperand(Call.Val,0))) {
MachineFunction &MF = DAG.getMachineFunction();
unsigned CallerCC = MF.getFunction()->getCallingConv();
unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
SDOperand Callee = Call.getOperand(4);
// On elf/pic %ebx needs to be livein.
- if(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT()) {
- // Can only do local tail calls with PIC.
- GlobalValue * GV = 0;
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if(G != 0 &&
- (GV = G->getGlobal()) &&
- (GV->hasHiddenVisibility() || GV->hasProtectedVisibility()))
- IsEligible=true;
- } else {
- IsEligible=true;
- }
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
+ !Subtarget->isPICStyleGOT())
+ return true;
+
+ // Can only do local tail calls with PIC.
+ GlobalValue * GV = 0;
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if(G != 0 &&
+ (GV = G->getGlobal()) &&
+ (GV->hasHiddenVisibility() || GV->hasProtectedVisibility()))
+ return true;
}
}
- return IsEligible;
+
+ return false;
}
-SDOperand X86TargetLowering::LowerX86_TailCallTo(SDOperand Op,
- SelectionDAG &DAG,
- unsigned CC) {
+SDOperand
+X86TargetLowering::LowerX86_TailCallTo(SDOperand Op, SelectionDAG &DAG,
+ unsigned CC) {
SDOperand Chain = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
SDOperand Callee = Op.getOperand(4);
bool is64Bit = Subtarget->is64Bit();
- assert(isTailCall && PerformTailCallOpt && "Should only emit tail calls.");
+ assert(cast<ConstantSDNode>(Op.getOperand(3))->getValue() &&PerformTailCallOpt
+ && "Should only emit tail calls.");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
else
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall);
-
// Lower arguments at fp - stackoffset + fpdiff.
MachineFunction &MF = DAG.getMachineFunction();
if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
- // Adjust the ret address stack slot.
+ Chain = DAG.
+ getCALLSEQ_START(Chain, DAG.getConstant(NumBytesToBePushed, getPointerTy()));
+
+ // Adjust the Return address stack slot.
+ SDOperand RetAddrFrIdx, NewRetAddrFrIdx;
if (FPDiff) {
MVT::ValueType VT = is64Bit ? MVT::i64 : MVT::i32;
- SDOperand RetAddrFrIdx = getReturnAddressFrameIndex(DAG);
+ RetAddrFrIdx = getReturnAddressFrameIndex(DAG);
+ // Load the "old" Return address.
RetAddrFrIdx =
- DAG.getLoad(VT, DAG.getEntryNode(),RetAddrFrIdx, NULL, 0);
- // Emit a store of the saved ret value to the new location.
+ DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0);
+ // Calculate the new stack slot for the return address.
int SlotSize = is64Bit ? 8 : 4;
int NewReturnAddrFI =
MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
- SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
- Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0);
+ NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+ Chain = SDOperand(RetAddrFrIdx.Val, 1);
}
- Chain = DAG.
- getCALLSEQ_START(Chain, DAG.getConstant(NumBytesToBePushed, getPointerTy()));
-
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
SmallVector<SDOperand, 8> MemOpChains;
SmallVector<SDOperand, 8> MemOpChains2;
InFlag = Chain.getValue(1);
}
InFlag = SDOperand();
+
// Copy from stack slots to stack slot of a tail called function. This needs
// to be done because if we would lower the arguments directly to their real
// stack slot we might end up overwriting each other.
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
- // Copy relative to framepointer.
- MemOpChains2.push_back(DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, FIN,
- PtrOff, SizeNode, AlignNode));
+ SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1);
+
+ MemOpChains2.push_back(DAG.getMemcpy(Chain, FIN, PtrOff, SizeNode,
+ AlignNode,AlwaysInline));
} else {
SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, NULL,0);
// Store relative to framepointer.
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOpChains2[0], MemOpChains.size());
+ // Store the return address to the appropriate stack slot.
+ if (FPDiff)
+ Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0);
+
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
// Does not work with tail call since ebx is not restored correctly by
Callee,InFlag);
Callee = DAG.getRegister(Opc, getPointerTy());
// Add register as live out.
- DAG.getMachineFunction().addLiveOut(Opc);
+ DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
}
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
Ops.push_back(InFlag);
Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
-
+
// Returns a chain & a flag for retval copy to use.
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
Ops.clear();
MachineFrameInfo *MFI = MF.getFrameInfo();
SDOperand Root = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- unsigned CC= MF.getFunction()->getCallingConv();
+ unsigned CC = MF.getFunction()->getCallingConv();
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
static const unsigned GPR64ArgRegs[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
-
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg,
- getTargetMachine(), ArgLocs);
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ // Check for possible tail call calling convention.
if (CC == CallingConv::Fast && PerformTailCallOpt)
CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_TailCall);
else
ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
}
}
-
+
unsigned StackSize = CCInfo.getNextStackOffset();
- if (CC==CallingConv::Fast)
- StackSize =GetAlignedArgumentStackSize(StackSize, DAG);
-
+ // align stack specially for tail calls
+ if (CC == CallingConv::Fast)
+ StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
+
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- assert(CC!=CallingConv::Fast
- && "Var arg not supported with calling convention fastcc");
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
}
ArgValues.push_back(Root);
+
// Tail call convention (fastcc) needs callee pop.
- if (CC == CallingConv::Fast && PerformTailCallOpt){
- BytesToPopOnReturn = StackSize; // Callee pops everything.
+ if (CC == CallingConv::Fast && PerformTailCallOpt) {
+ BytesToPopOnReturn = StackSize; // Callee pops everything.
BytesCallerReserves = 0;
} else {
- BytesToPopOnReturn = 0; // Callee pops nothing.
+ BytesToPopOnReturn = 0; // Callee pops nothing.
BytesCallerReserves = StackSize;
}
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
SDOperand Chain = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
SDOperand Callee = Op.getOperand(4);
-
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
+
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- if (CC==CallingConv::Fast)
+ if (CC==CallingConv::Fast && PerformTailCallOpt)
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall);
else
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
-
+
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (CC == CallingConv::Fast)
- NumBytes = GetAlignedArgumentStackSize(NumBytes,DAG);
+ NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
SmallVector<SDOperand, 8> MemOpChains;
SDOperand StackPtr;
-
+
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
}
if (isVarArg) {
- assert ( CallingConv::Fast != CC &&
- "Var args not supported with calling convention fastcc");
-
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
&& !Subtarget->GVRequiresExtraLoad(G->getGlobal(),
getTargetMachine(), true))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
if (getTargetMachine().getCodeModel() != CodeModel::Large)
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
-
+ }
+
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDOperand, 8> Ops;
if (InFlag.Val)
Ops.push_back(InFlag);
- Chain = DAG.getNode(X86ISD::CALL,
- NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
- int NumBytesForCalleeToPush = 0;
- if (CC==CallingConv::Fast) {
+
+ // Create the CALLSEQ_END node.
+ unsigned NumBytesForCalleeToPush = 0;
+ if (CC == CallingConv::Fast && PerformTailCallOpt) {
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
-
} else {
NumBytesForCalleeToPush = 0; // Callee pops nothing.
}
+
// Returns a flag for retval copy to use.
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy()),
+ DAG.getConstant(NumBytesForCalleeToPush,
+ getPointerTy()),
+ InFlag);
InFlag = Chain.getValue(1);
-
+
// Handle result values, copying them out of physregs into vregs that we
// return.
return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) {
- if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ if (NumElts != 2 && NumElts != 4)
return false;
if (!isUndefOrEqual(Elts[0], NumElts))
if (Arg.getOpcode() == ISD::UNDEF) continue;
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
- if (Val > 4)
+ if (Val >= 4)
return false;
}
return true;
}
-/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
+/// CommuteVectorShuffle - Swap vector_shuffle operands as well as
/// values in ther permute mask.
static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
SDOperand &V2, SDOperand &Mask,
}
std::swap(V1, V2);
- Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static
+SDOperand CommuteVectorShuffleMask(SDOperand Mask, SelectionDAG &DAG) {
+ MVT::ValueType MaskVT = Mask.getValueType();
+ MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT);
+ unsigned NumElems = Mask.getNumOperands();
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDOperand Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+ continue;
+ }
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val < NumElems)
+ MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
+ else
+ MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems);
+}
+
+
/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
unsigned NumElems = Mask.getNumOperands();
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Arg = Mask.getOperand(i);
- if (Arg.getOpcode() != ISD::UNDEF) {
- unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
- if (Idx < NumElems) {
- unsigned Opc = V1.Val->getOpcode();
- if (Opc == ISD::UNDEF)
- continue;
- if (Opc != ISD::BUILD_VECTOR ||
- !isZeroNode(V1.Val->getOperand(Idx)))
- return false;
- } else if (Idx >= NumElems) {
- unsigned Opc = V2.Val->getOpcode();
- if (Opc == ISD::UNDEF)
- continue;
- if (Opc != ISD::BUILD_VECTOR ||
- !isZeroNode(V2.Val->getOperand(Idx - NumElems)))
- return false;
- }
+ if (Arg.getOpcode() == ISD::UNDEF)
+ continue;
+
+ unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
+ if (Idx < NumElems) {
+ unsigned Opc = V1.Val->getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.Val))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !isZeroNode(V1.Val->getOperand(Idx)))
+ return false;
+ } else if (Idx >= NumElems) {
+ unsigned Opc = V2.Val->getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.Val))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !isZeroNode(V2.Val->getOperand(Idx - NumElems)))
+ return false;
}
}
return true;
///
static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
assert(MVT::isVector(VT) && "Expected a vector type");
- unsigned NumElems = MVT::getVectorNumElements(VT);
- MVT::ValueType EVT = MVT::getVectorElementType(VT);
- bool isFP = MVT::isFloatingPoint(EVT);
- SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
- SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
- return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
+
+ // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+ // type. This ensures they get CSE'd.
+ SDOperand Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDOperand Vec;
+ if (MVT::getSizeInBits(VT) == 64) // MMX
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
+ else // SSE
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
+ return DAG.getNode(ISD::BIT_CONVERT, VT, Vec);
+}
+
+/// getOnesVector - Returns a vector of specified type with all bits set.
+///
+static SDOperand getOnesVector(MVT::ValueType VT, SelectionDAG &DAG) {
+ assert(MVT::isVector(VT) && "Expected a vector type");
+
+ // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+ // type. This ensures they get CSE'd.
+ SDOperand Cst = DAG.getTargetConstant(~0U, MVT::i32);
+ SDOperand Vec;
+ if (MVT::getSizeInBits(VT) == 64) // MMX
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
+ else // SSE
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
+ return DAG.getNode(ISD::BIT_CONVERT, VT, Vec);
}
+
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
}
V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
- Mask = getZeroVector(MaskVT, DAG);
+ Mask = getZeroVector(MVT::v4i32, DAG);
SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
-/// vector of zero or undef vector.
+/// vector of zero or undef vector. This produces a shuffle where the low
+/// element of V2 is swizzled into the zero/undef vector, landing at element
+/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
unsigned NumElems, unsigned Idx,
bool isZero, SelectionDAG &DAG) {
SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
- SDOperand Zero = DAG.getConstant(0, EVT);
- SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
- MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
+ SmallVector<SDOperand, 16> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i)
+ if (i == Idx) // If this is the insertion idx, put the low elt of V2 here.
+ MaskVec.push_back(DAG.getConstant(NumElems, EVT));
+ else
+ MaskVec.push_back(DAG.getConstant(i, EVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
- // All zero's are handled with pxor.
- if (ISD::isBuildVectorAllZeros(Op.Val))
- return Op;
+ // All zero's are handled with pxor, all one's are handled with pcmpeqd.
+ if (ISD::isBuildVectorAllZeros(Op.Val) || ISD::isBuildVectorAllOnes(Op.Val)) {
+ // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
+ // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
+ // eliminated on x86-32 hosts.
+ if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
+ return Op;
- // All one's are handled with pcmpeqd.
- if (ISD::isBuildVectorAllOnes(Op.Val))
- return Op;
+ if (ISD::isBuildVectorAllOnes(Op.Val))
+ return getOnesVector(Op.getValueType(), DAG);
+ return getZeroVector(Op.getValueType(), DAG);
+ }
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EVT = MVT::getVectorElementType(VT);
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
- unsigned NumNonZeroImms = 0;
- std::set<SDOperand> Values;
+ bool HasNonImms = false;
+ SmallSet<SDOperand, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i);
- if (Elt.getOpcode() != ISD::UNDEF) {
- Values.insert(Elt);
- if (isZeroNode(Elt))
- NumZero++;
- else {
- NonZeros |= (1 << i);
- NumNonZero++;
- if (Elt.getOpcode() == ISD::Constant ||
- Elt.getOpcode() == ISD::ConstantFP)
- NumNonZeroImms++;
- }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ Values.insert(Elt);
+ if (Elt.getOpcode() != ISD::Constant &&
+ Elt.getOpcode() != ISD::ConstantFP)
+ HasNonImms = true;
+ if (isZeroNode(Elt))
+ NumZero++;
+ else {
+ NonZeros |= (1 << i);
+ NumNonZero++;
}
}
if (NumNonZero == 0) {
- if (NumZero == 0)
- // All undef vector. Return an UNDEF.
- return DAG.getNode(ISD::UNDEF, VT);
- else
- // A mix of zero and undef. Return a zero vector.
- return getZeroVector(VT, DAG);
+ // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ return DAG.getNode(ISD::UNDEF, VT);
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
return SDOperand();
// Special case for single non-zero element.
- if (NumNonZero == 1) {
+ if (NumNonZero == 1 && NumElems <= 4) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
NumZero > 0, DAG);
+ else if (!HasNonImms) // Otherwise, it's better to do a constpool load.
+ return SDOperand();
if (EVTBits == 32) {
// Turn it into a shuffle of zero and zero-extended scalar to vector.
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
- if (NumNonZero == NumNonZeroImms)
+ if (!HasNonImms)
return SDOperand();
// Let legalizer expand 2-wide build_vectors.
return SDOperand();
}
+static
+SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2,
+ SDOperand PermMask, SelectionDAG &DAG,
+ TargetLowering &TLI) {
+ SDOperand NewV;
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8);
+ MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
+ MVT::ValueType PtrVT = TLI.getPointerTy();
+ SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(),
+ PermMask.Val->op_end());
+
+ // First record which half of which vector the low elements come from.
+ SmallVector<unsigned, 4> LowQuad(4);
+ for (unsigned i = 0; i < 4; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ int QuadIdx = EltIdx / 4;
+ ++LowQuad[QuadIdx];
+ }
+ int BestLowQuad = -1;
+ unsigned MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (LowQuad[i] > MaxQuad) {
+ BestLowQuad = i;
+ MaxQuad = LowQuad[i];
+ }
+ }
+
+ // Record which half of which vector the high elements come from.
+ SmallVector<unsigned, 4> HighQuad(4);
+ for (unsigned i = 4; i < 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ int QuadIdx = EltIdx / 4;
+ ++HighQuad[QuadIdx];
+ }
+ int BestHighQuad = -1;
+ MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (HighQuad[i] > MaxQuad) {
+ BestHighQuad = i;
+ MaxQuad = HighQuad[i];
+ }
+ }
+
+ // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it.
+ if (BestLowQuad != -1 || BestHighQuad != -1) {
+ // First sort the 4 chunks in order using shufpd.
+ SmallVector<SDOperand, 8> MaskVec;
+ if (BestLowQuad != -1)
+ MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(0, MVT::i32));
+ if (BestHighQuad != -1)
+ MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(1, MVT::i32));
+ SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1),
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask);
+ NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV);
+
+ // Now sort high and low parts separately.
+ BitVector InOrder(8);
+ if (BestLowQuad != -1) {
+ // Sort lower half in order using PSHUFLW.
+ MaskVec.clear();
+ bool AnyOutOrder = false;
+ for (unsigned i = 0; i != 4; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(Elt);
+ InOrder.set(i);
+ } else {
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx != i)
+ AnyOutOrder = true;
+ MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT));
+ // If this element is in the right place after this shuffle, then
+ // remember it.
+ if ((int)(EltIdx / 4) == BestLowQuad)
+ InOrder.set(i);
+ }
+ }
+ if (AnyOutOrder) {
+ for (unsigned i = 4; i != 8; ++i)
+ MaskVec.push_back(DAG.getConstant(i, MaskEVT));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
+ }
+ }
+
+ if (BestHighQuad != -1) {
+ // Sort high half in order using PSHUFHW if possible.
+ MaskVec.clear();
+ for (unsigned i = 0; i != 4; ++i)
+ MaskVec.push_back(DAG.getConstant(i, MaskEVT));
+ bool AnyOutOrder = false;
+ for (unsigned i = 4; i != 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(Elt);
+ InOrder.set(i);
+ } else {
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx != i)
+ AnyOutOrder = true;
+ MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT));
+ // If this element is in the right place after this shuffle, then
+ // remember it.
+ if ((int)(EltIdx / 4) == BestHighQuad)
+ InOrder.set(i);
+ }
+ }
+ if (AnyOutOrder) {
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
+ }
+ }
+
+ // The other elements are put in the right place using pextrw and pinsrw.
+ for (unsigned i = 0; i != 8; ++i) {
+ if (InOrder[i])
+ continue;
+ SDOperand Elt = MaskElts[i];
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx == i)
+ continue;
+ SDOperand ExtOp = (EltIdx < 8)
+ ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
+ DAG.getConstant(EltIdx, PtrVT))
+ : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
+ DAG.getConstant(EltIdx - 8, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
+ }
+
+ // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use
+ ///as few as possible.
+ // First, let's find out how many elements are already in the right order.
+ unsigned V1InOrder = 0;
+ unsigned V1FromV1 = 0;
+ unsigned V2InOrder = 0;
+ unsigned V2FromV2 = 0;
+ SmallVector<SDOperand, 8> V1Elts;
+ SmallVector<SDOperand, 8> V2Elts;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(Elt);
+ ++V1InOrder;
+ ++V2InOrder;
+ continue;
+ }
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx == i) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(DAG.getConstant(i+8, MaskEVT));
+ ++V1InOrder;
+ } else if (EltIdx == i+8) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(DAG.getConstant(i, MaskEVT));
+ ++V2InOrder;
+ } else if (EltIdx < 8) {
+ V1Elts.push_back(Elt);
+ ++V1FromV1;
+ } else {
+ V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT));
+ ++V2FromV2;
+ }
+ }
+
+ if (V2InOrder > V1InOrder) {
+ PermMask = CommuteVectorShuffleMask(PermMask, DAG);
+ std::swap(V1, V2);
+ std::swap(V1Elts, V2Elts);
+ std::swap(V1FromV1, V2FromV2);
+ }
+
+ if ((V1FromV1 + V1InOrder) != 8) {
+ // Some elements are from V2.
+ if (V1FromV1) {
+ // If there are elements that are from V1 but out of place,
+ // then first sort them in place
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
+ continue;
+ }
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx >= 8)
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
+ else
+ MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
+ }
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
+ }
+
+ NewV = V1;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx < 8)
+ continue;
+ SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
+ DAG.getConstant(EltIdx - 8, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
+ } else {
+ // All elements are from V1.
+ NewV = V1;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
+ DAG.getConstant(EltIdx, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
+ }
+}
+
+/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
+/// done when every pair / quad of shuffle mask elements point to elements in
+/// the right sequence. e.g.
+/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
+static
+SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2,
+ MVT::ValueType VT,
+ SDOperand PermMask, SelectionDAG &DAG,
+ TargetLowering &TLI) {
+ unsigned NumElems = PermMask.getNumOperands();
+ unsigned NewWidth = (NumElems == 4) ? 2 : 4;
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
+ MVT::ValueType NewVT = MaskVT;
+ switch (VT) {
+ case MVT::v4f32: NewVT = MVT::v2f64; break;
+ case MVT::v4i32: NewVT = MVT::v2i64; break;
+ case MVT::v8i16: NewVT = MVT::v4i32; break;
+ case MVT::v16i8: NewVT = MVT::v4i32; break;
+ default: assert(false && "Unexpected!");
+ }
+
+ if (NewWidth == 2)
+ if (MVT::isInteger(VT))
+ NewVT = MVT::v2i64;
+ else
+ NewVT = MVT::v2f64;
+ unsigned Scale = NumElems / NewWidth;
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i < NumElems; i += Scale) {
+ unsigned StartIdx = ~0U;
+ for (unsigned j = 0; j < Scale; ++j) {
+ SDOperand Elt = PermMask.getOperand(i+j);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (StartIdx == ~0U)
+ StartIdx = EltIdx - (EltIdx % Scale);
+ if (EltIdx != StartIdx + j)
+ return SDOperand();
+ }
+ if (StartIdx == ~0U)
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32));
+ }
+
+ V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &MaskVec[0], MaskVec.size()));
+}
+
SDOperand
X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
return PromoteSplat(Op, DAG);
}
+ // If the shuffle can be profitably rewritten as a narrower shuffle, then
+ // do it!
+ if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ if (NewOp.Val)
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+ // FIXME: Figure out a cleaner way to do this.
+ // Try to make use of movq to zero out the top part.
+ if (ISD::isBuildVectorAllZeros(V2.Val)) {
+ SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ if (NewOp.Val) {
+ SDOperand NewV1 = NewOp.getOperand(0);
+ SDOperand NewV2 = NewOp.getOperand(1);
+ SDOperand NewMask = NewOp.getOperand(2);
+ if (isCommutedMOVL(NewMask.Val, true, false)) {
+ NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
+ NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(),
+ NewV1, NewV2, getMOVLMask(2, DAG));
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ }
+ }
+ } else if (ISD::isBuildVectorAllZeros(V1.Val)) {
+ SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ }
+ }
+
if (X86::isMOVLMask(PermMask.Val))
return (V1IsUndef) ? V2 : Op;
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
bool Commuted = false;
+ // FIXME: This should also accept a bitcast of a splat? Be careful, not
+ // 1,1,1,1 -> v8i16 though.
V1IsSplat = isSplatVector(V1.Val);
V2IsSplat = isSplatVector(V2.Val);
+
+ // Canonicalize the splat or undef, if present, to be on the RHS.
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
Commuted = true;
}
+ // FIXME: Figure out a cleaner way to do this.
if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
if (V2IsUndef) return V1;
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (X86::isSHUFPMask(PermMask.Val) &&
MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
return Op;
-
- // Handle v8i16 shuffle high / low shuffle node pair.
- if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
- MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
- SmallVector<SDOperand, 8> MaskVec;
- for (unsigned i = 0; i != 4; ++i)
- MaskVec.push_back(PermMask.getOperand(i));
- for (unsigned i = 4; i != 8; ++i)
- MaskVec.push_back(DAG.getConstant(i, BaseVT));
- SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
- &MaskVec[0], MaskVec.size());
- V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
- MaskVec.clear();
- for (unsigned i = 0; i != 4; ++i)
- MaskVec.push_back(DAG.getConstant(i, BaseVT));
- for (unsigned i = 4; i != 8; ++i)
- MaskVec.push_back(PermMask.getOperand(i));
- Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
- return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
- }
} else {
// Floating point cases in the other order.
if (X86::isSHUFPMask(PermMask.Val))
}
}
- if (NumElems == 4 &&
- // Don't do this for MMX.
- MVT::getSizeInBits(VT) != 64) {
+ // Handle v8i16 specifically since SSE can do byte extraction and insertion.
+ if (VT == MVT::v8i16) {
+ SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
+ if (NewOp.Val)
+ return NewOp;
+ }
+
+ // Handle all 4 wide cases with a number of shuffles.
+ if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
+ // Don't do this for MMX.
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<std::pair<int, int>, 8> Locs;
Locs.reserve(NumElems);
- SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
- SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector<SDOperand, 8> Mask1(NumElems,
+ DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector<SDOperand, 8> Mask2(NumElems,
+ DAG.getNode(ISD::UNDEF, MaskEVT));
unsigned NumHi = 0;
unsigned NumLo = 0;
// If no more than two elements come from either vector. This can be
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
+ SDOperand Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec),
+ Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (MVT::getSizeInBits(VT) == 32) {
- SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
+ SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getConstant(0, getPointerTy()));
} else if (MVT::getSizeInBits(VT) == 64) {
- SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
+ SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
- // Transform it so it match pinsrw which expects a 16-bit value in a GR32
- // as its second argument.
MVT::ValueType VT = Op.getValueType();
- MVT::ValueType BaseVT = MVT::getVectorElementType(VT);
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ if (EVT == MVT::i8)
+ return SDOperand();
+
SDOperand N0 = Op.getOperand(0);
SDOperand N1 = Op.getOperand(1);
SDOperand N2 = Op.getOperand(2);
- if (MVT::getSizeInBits(BaseVT) == 16) {
+
+ if (MVT::getSizeInBits(EVT) == 16) {
+ // Transform it so it match pinsrw which expects a 16-bit value in a GR32
+ // as its second argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
- } else if (MVT::getSizeInBits(BaseVT) == 32) {
- unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
- if (Idx == 0) {
- // Use a movss.
- N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
- MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
- SmallVector<SDOperand, 8> MaskVec;
- MaskVec.push_back(DAG.getConstant(4, BaseVT));
- for (unsigned i = 1; i <= 3; ++i)
- MaskVec.push_back(DAG.getConstant(i, BaseVT));
- return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
- DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
- &MaskVec[0], MaskVec.size()));
- } else {
- // Use two pinsrw instructions to insert a 32 bit value.
- Idx <<= 1;
- if (MVT::isFloatingPoint(N1.getValueType())) {
- N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
- N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
- N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
- DAG.getConstant(0, getPointerTy()));
- }
- N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
- N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
- DAG.getConstant(Idx, getPointerTy()));
- N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
- N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
- DAG.getConstant(Idx+1, getPointerTy()));
- return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
- }
}
- return SDOperand();
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
+ unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
+ MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
+ SmallVector<SDOperand, 4> MaskVec;
+ for (unsigned i = 0; i < 4; ++i)
+ MaskVec.push_back(DAG.getConstant((i == Idx) ? i+4 : i, MaskEVT));
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
+ DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &MaskVec[0], MaskVec.size()));
}
SDOperand
return Result;
}
+/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
+/// take a 2 x i32 value to shift plus a shift amount.
SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
- assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
- "Not an i64 shift!");
- bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
- SDOperand ShOpLo = Op.getOperand(0);
- SDOperand ShOpHi = Op.getOperand(1);
- SDOperand ShAmt = Op.getOperand(2);
- SDOperand Tmp1 = isSRA ?
- DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
- DAG.getConstant(0, MVT::i32);
-
- SDOperand Tmp2, Tmp3;
- if (Op.getOpcode() == ISD::SHL_PARTS) {
- Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
- Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
- } else {
- Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
- Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
- }
+ assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
+ "Not an i64 shift!");
+ bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
+ SDOperand ShOpLo = Op.getOperand(0);
+ SDOperand ShOpHi = Op.getOperand(1);
+ SDOperand ShAmt = Op.getOperand(2);
+ SDOperand Tmp1 = isSRA ?
+ DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
+ DAG.getConstant(0, MVT::i32);
+
+ SDOperand Tmp2, Tmp3;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
+ } else {
+ Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
+ }
- const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
- SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
- DAG.getConstant(32, MVT::i8));
- SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32,
- AndNode, DAG.getConstant(0, MVT::i8));
-
- SDOperand Hi, Lo;
- SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
- unsigned Opc = X86ISD::CMOV;
- VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
- SmallVector<SDOperand, 4> Ops;
- if (Op.getOpcode() == ISD::SHL_PARTS) {
- Ops.push_back(Tmp2);
- Ops.push_back(Tmp3);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Hi = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size());
-
- Ops.clear();
- Ops.push_back(Tmp3);
- Ops.push_back(Tmp1);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Lo = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size());
- } else {
- Ops.push_back(Tmp2);
- Ops.push_back(Tmp3);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Lo = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size());
-
- Ops.clear();
- Ops.push_back(Tmp3);
- Ops.push_back(Tmp1);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Hi = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size());
- }
+ const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
+ SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
+ DAG.getConstant(32, MVT::i8));
+ SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32,
+ AndNode, DAG.getConstant(0, MVT::i8));
+
+ SDOperand Hi, Lo;
+ SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
+ SmallVector<SDOperand, 4> Ops;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Ops.push_back(Tmp2);
+ Ops.push_back(Tmp3);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+
+ Ops.clear();
+ Ops.push_back(Tmp3);
+ Ops.push_back(Tmp1);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+ } else {
+ Ops.push_back(Tmp2);
+ Ops.push_back(Tmp3);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
- VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
Ops.clear();
- Ops.push_back(Lo);
- Ops.push_back(Hi);
- return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
+ Ops.push_back(Tmp3);
+ Ops.push_back(Tmp1);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+ }
+
+ VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
+ Ops.clear();
+ Ops.push_back(Lo);
+ Ops.push_back(Hi);
+ return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
}
SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
return Result;
}
-SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
+std::pair<SDOperand,SDOperand> X86TargetLowering::
+FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
"Unknown FP_TO_SINT to lower!");
- // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
- // stack slot.
- SDOperand Result;
- MachineFunction &MF = DAG.getMachineFunction();
- unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
- int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
- SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
// These are really Legal.
if (Op.getValueType() == MVT::i32 &&
X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32)
- return Result;
+ return std::make_pair(SDOperand(), SDOperand());
if (Op.getValueType() == MVT::i32 &&
X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)
- return Result;
+ return std::make_pair(SDOperand(), SDOperand());
if (Subtarget->is64Bit() &&
Op.getValueType() == MVT::i64 &&
Op.getOperand(0).getValueType() != MVT::f80)
- return Result;
+ return std::make_pair(SDOperand(), SDOperand());
+ // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
+ // stack slot.
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
unsigned Opc;
switch (Op.getValueType()) {
- default: assert(0 && "Invalid FP_TO_SINT to lower!");
- case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
- case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
- case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ default: assert(0 && "Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
}
SDOperand Chain = DAG.getEntryNode();
SDOperand Ops[] = { Chain, Value, StackSlot };
SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
+ return std::make_pair(FIST, StackSlot);
+}
+
+SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
+ std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(Op, DAG);
+ SDOperand FIST = Vals.first, StackSlot = Vals.second;
+ if (FIST.Val == 0) return SDOperand();
+
// Load the result.
return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
}
+SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) {
+ std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG);
+ SDOperand FIST = Vals.first, StackSlot = Vals.second;
+ if (FIST.Val == 0) return 0;
+
+ // Return an i64 load from the stack slot.
+ SDOperand Res = DAG.getLoad(MVT::i64, FIST, StackSlot, NULL, 0);
+
+ // Use a MERGE_VALUES node to drop the chain result value.
+ return DAG.getNode(ISD::MERGE_VALUES, MVT::i64, Res).Val;
+}
+
SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EltVT = VT;
SrcVT = VT;
SrcTy = MVT::getTypeForValueType(SrcVT);
}
+ // And if it is bigger, shrink it first.
+ if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
+ Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1);
+ SrcVT = VT;
+ SrcTy = MVT::getTypeForValueType(SrcVT);
+ }
+
+ // At this point the operands and the result should have the same
+ // type, and that won't be f80 since that is not custom lowered.
// First get the sign bit of second operand.
std::vector<Constant*> CV;
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
else if (VT == MVT::f64 && !X86ScalarSSEf64)
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
+ else if (VT == MVT::f80)
+ IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
if ((Opc == X86ISD::CMP ||
Opc == X86ISD::COMI ||
Opc == X86ISD::UCOMI) && !IllegalFPCMov) {
default:
assert(0 && "Unsupported calling convention");
case CallingConv::Fast:
- return LowerCCCArguments(Op,DAG, true);
- // Falls through
+ return LowerCCCArguments(Op, DAG, true);
case CallingConv::C:
return LowerCCCArguments(Op, DAG);
case CallingConv::X86_StdCall:
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
if ((Align & 3) != 0 ||
- (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
+ (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) {
MVT::ValueType IntPtr = getPointerTy();
const Type *IntPtrTy = getTargetData()->getIntPtrType();
TargetLowering::ArgListTy Args;
return Chain;
}
-SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
- SDOperand ChainOp = Op.getOperand(0);
- SDOperand DestOp = Op.getOperand(1);
- SDOperand SourceOp = Op.getOperand(2);
- SDOperand CountOp = Op.getOperand(3);
- SDOperand AlignOp = Op.getOperand(4);
- unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue();
- if (Align == 0) Align = 1;
-
- // The libc version is likely to be faster for the following cases. It can
- // use the address value and run time information about the CPU.
- // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
-
- // If not DWORD aligned, call memcpy.
- if ((Align & 3) != 0)
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
- // If size is unknown, call memcpy.
- ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
- if (!I)
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
- // If size is more than the threshold, call memcpy.
- unsigned Size = I->getValue();
- if (Size > Subtarget->getMinRepStrSizeThreshold())
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
- return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
-}
-
-SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain,
- SDOperand Dest,
- SDOperand Source,
- SDOperand Count,
- SelectionDAG &DAG) {
- MVT::ValueType IntPtr = getPointerTy();
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Ty = getTargetData()->getIntPtrType();
- Entry.Node = Dest; Args.push_back(Entry);
- Entry.Node = Source; Args.push_back(Entry);
- Entry.Node = Count; Args.push_back(Entry);
- std::pair<SDOperand,SDOperand> CallResult =
- LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
- DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
- return CallResult.second;
-}
-
SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
SDOperand Dest,
SDOperand Source,
return Chain;
}
-SDOperand
-X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
+/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain
+SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDOperand TheOp = Op.getOperand(0);
- SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
+ SDOperand TheChain = N->getOperand(0);
+ SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1);
if (Subtarget->is64Bit()) {
- SDOperand Copy1 =
- DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
- SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
- MVT::i64, Copy1.getValue(2));
- SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
+ SDOperand rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
+ SDOperand rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX,
+ MVT::i64, rax.getValue(2));
+ SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx,
DAG.getConstant(32, MVT::i8));
SDOperand Ops[] = {
- DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
+ DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1)
};
Tys = DAG.getVTList(MVT::i64, MVT::Other);
- return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
+ return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val;
}
- SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
- SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
- MVT::i32, Copy1.getValue(2));
- SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
- Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
- return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
+ SDOperand eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
+ SDOperand edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX,
+ MVT::i32, eax.getValue(2));
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ SDOperand Ops[] = { eax, edx };
+ Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2);
+
+ // Use a MERGE_VALUES to return the value and chain.
+ Ops[1] = edx.getValue(1);
+ Tys = DAG.getVTList(MVT::i64, MVT::Other);
+ return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val;
}
SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
- MF.addLiveOut(X86::ECX);
+ MF.getRegInfo().addLiveOut(X86::ECX);
return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
Chain, DAG.getRegister(X86::ECX, getPointerTy()));
default:
assert(0 && "Unsupported calling convention");
case CallingConv::C:
- case CallingConv::Fast:
case CallingConv::X86_StdCall: {
// Pass 'nest' parameter in ECX.
// Must be kept in sync with X86CallingConv.td
// Check that ECX wasn't needed by an 'inreg' parameter.
const FunctionType *FTy = Func->getFunctionType();
- const ParamAttrsList *Attrs = FTy->getParamAttrs();
+ const ParamAttrsList *Attrs = Func->getParamAttrs();
if (Attrs && !Func->isVarArg()) {
unsigned InRegCount = 0;
Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr);
unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
- unsigned char N86Reg = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg);
+ unsigned char N86Reg = ((X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
Trmp, TrmpSV->getValue(), TrmpSV->getOffset());
}
}
+SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) {
+ /*
+ The rounding mode is in bits 11:10 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to -inf
+ 10 Round to +inf
+ 11 Round to 0
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ MVT::ValueType VT = Op.getValueType();
+
+ // Save FP Control Word to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+ SDOperand Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other,
+ DAG.getEntryNode(), StackSlot);
+
+ // Load FP Control Word from stack slot
+ SDOperand CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0);
+
+ // Transform as necessary
+ SDOperand CWD1 =
+ DAG.getNode(ISD::SRL, MVT::i16,
+ DAG.getNode(ISD::AND, MVT::i16,
+ CWD, DAG.getConstant(0x800, MVT::i16)),
+ DAG.getConstant(11, MVT::i8));
+ SDOperand CWD2 =
+ DAG.getNode(ISD::SRL, MVT::i16,
+ DAG.getNode(ISD::AND, MVT::i16,
+ CWD, DAG.getConstant(0x400, MVT::i16)),
+ DAG.getConstant(9, MVT::i8));
+
+ SDOperand RetVal =
+ DAG.getNode(ISD::AND, MVT::i16,
+ DAG.getNode(ISD::ADD, MVT::i16,
+ DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2),
+ DAG.getConstant(1, MVT::i16)),
+ DAG.getConstant(3, MVT::i16));
+
+
+ return DAG.getNode((MVT::getSizeInBits(VT) < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal);
+}
+
+SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType OpVT = VT;
+ unsigned NumBits = MVT::getSizeInBits(VT);
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ // Zero extend to i32 since there is not an i8 bsr.
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);
+ }
+
+ // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSR, VTs, Op);
+
+ // If src is zero (i.e. bsr sets ZF), returns NumBits.
+ SmallVector<SDOperand, 4> Ops;
+ Ops.push_back(Op);
+ Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT));
+ Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
+ Ops.push_back(Op.getValue(1));
+ Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4);
+
+ // Finally xor with NumBits-1.
+ Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);
+ return Op;
+}
+
+SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType OpVT = VT;
+ unsigned NumBits = MVT::getSizeInBits(VT);
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);
+ }
+
+ // Issue a bsf (scan bits forward) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSF, VTs, Op);
+
+ // If src is zero (i.e. bsf sets ZF), returns NumBits.
+ SmallVector<SDOperand, 4> Ops;
+ Ops.push_back(Op);
+ Ops.push_back(DAG.getConstant(NumBits, OpVT));
+ Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
+ Ops.push_back(Op.getValue(1));
+ Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4);
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);
+ return Op;
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
case ISD::MEMSET: return LowerMEMSET(Op, DAG);
case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
- case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::FLT_ROUNDS: return LowerFLT_ROUNDS(Op, DAG);
+ case ISD::CTLZ: return LowerCTLZ(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op, DAG);
+
+ // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands.
+ case ISD::READCYCLECOUNTER:
+ return SDOperand(ExpandREADCYCLECOUNTER(Op.Val, DAG), 0);
+ }
+}
+
+/// ExpandOperation - Provide custom lowering hooks for expanding operations.
+SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) {
+ switch (N->getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG);
+ case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG);
}
- return SDOperand();
}
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return NULL;
+ case X86ISD::BSF: return "X86ISD::BSF";
+ case X86ISD::BSR: return "X86ISD::BSR";
case X86ISD::SHLD: return "X86ISD::SHLD";
case X86ISD::SHRD: return "X86ISD::SHRD";
case X86ISD::FAND: return "X86ISD::FAND";
case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
+ case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
}
}
}
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+ if (!Ty1->isInteger() || !Ty2->isInteger())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return Subtarget->is64Bit() || NumBits1 < 64;
+}
+
+bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1,
+ MVT::ValueType VT2) const {
+ if (!MVT::isInteger(VT1) || !MVT::isInteger(VT2))
+ return false;
+ unsigned NumBits1 = MVT::getSizeInBits(VT1);
+ unsigned NumBits2 = MVT::getSizeInBits(VT2);
+ if (NumBits1 <= NumBits2)
+ return false;
+ return Subtarget->is64Bit() || NumBits1 < 64;
+}
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
// Load the old value of the high byte of the control word...
unsigned OldCW =
- F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
+ F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
// Set the high part to be round to zero...
AM.Base.Reg = Op.getReg();
} else {
AM.BaseType = X86AddressMode::FrameIndexBase;
- AM.Base.FrameIndex = Op.getFrameIndex();
+ AM.Base.FrameIndex = Op.getIndex();
}
Op = MI->getOperand(1);
if (Op.isImmediate())
return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
else if (VT == MVT::i8)
return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
- break;
+ else if (VT == MVT::i64)
+ return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0);
+ break;
}
}