#include "SPUISelLowering.h"
#include "SPUTargetMachine.h"
#include "SPUFrameInfo.h"
+#include "SPUMachineFunction.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
#include "llvm/CallingConv.h"
+#include "llvm/Type.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#ifndef NDEBUG
if (retval == 0) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "getValueTypeMapEntry returns NULL for "
- << VT.getEVTString();
- llvm_report_error(Msg.str());
+ report_fatal_error("getValueTypeMapEntry returns NULL for " +
+ Twine(VT.getEVTString()));
}
#endif
SDValue
ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
- bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
+ bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
// The input chain to this libcall is the entry node of the function.
// Legalizing the call will automatically add the previous call to the
// dependence.
addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
- // "Odd size" vector classes that we're willing to support:
- addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
-
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
// Set pre-RA register scheduler default to BURR, which produces slightly
// better code than the default (could also be TDRR, but TargetLowering.h
// needs a mod to support that model):
- setSchedulingPreference(SchedulingForRegPressure);
+ setSchedulingPreference(Sched::RegPressure);
}
const char *
// Re-emit as a v16i8 vector load
result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
- LN->getSrcValue(), LN->getSrcValueOffset(),
+ LN->getPointerInfo(),
LN->isVolatile(), LN->isNonTemporal(), 16);
// Update the chain
case ISD::POST_DEC:
case ISD::LAST_INDEXED_MODE:
{
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
- "UNINDEXED\n";
- Msg << (unsigned) LN->getAddressingMode();
- llvm_report_error(Msg.str());
+ report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
+ "than UNINDEXED\n" +
+ Twine((unsigned)LN->getAddressingMode()));
/*NOTREACHED*/
}
}
if (alignment == 16) {
ConstantSDNode *CN;
-
// Special cases for a known aligned load to simplify the base pointer
// and insertion byte:
if (basePtr.getOpcode() == ISD::ADD
insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
DAG.getConstant(0, PtrVT));
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
}
} else {
// Unaligned load: must be more pessimistic about addressing modes:
DAG.getConstant(0, PtrVT));
}
- // Re-emit as a v16i8 vector load
- alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
- SN->getSrcValue(), SN->getSrcValueOffset(),
+ // Load the memory to which to store.
+ alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
+ SN->getPointerInfo(),
SN->isVolatile(), SN->isNonTemporal(), 16);
// Update the chain
}
#endif
- SDValue insertEltOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
- SDValue vectorizeOp =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
+ SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+ insertEltOffs);
+ SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+ theValue);
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
vectorizeOp, alignLoadVec,
MVT::v4i32, insertEltOp));
result = DAG.getStore(the_chain, dl, result, basePtr,
- LN->getSrcValue(), LN->getSrcValueOffset(),
+ LN->getPointerInfo(),
LN->isVolatile(), LN->isNonTemporal(),
LN->getAlignment());
case ISD::POST_DEC:
case ISD::LAST_INDEXED_MODE:
{
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
- "UNINDEXED\n";
- Msg << (unsigned) SN->getAddressingMode();
- llvm_report_error(Msg.str());
+ report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
+ "than UNINDEXED\n" +
+ Twine((unsigned)SN->getAddressingMode()));
/*NOTREACHED*/
}
}
LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
EVT PtrVT = Op.getValueType();
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- Constant *C = CP->getConstVal();
+ const Constant *C = CP->getConstVal();
SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
SDValue Zero = DAG.getConstant(0, PtrVT);
const TargetMachine &TM = DAG.getTarget();
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
EVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
- GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
+ const GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ PtrVT, GSDN->getOffset());
const TargetMachine &TM = DAG.getTarget();
SDValue Zero = DAG.getConstant(0, PtrVT);
// FIXME there is no actual debug info here
return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
}
} else {
- llvm_report_error("LowerGlobalAddress: Relocation model other than static"
+ report_fatal_error("LowerGlobalAddress: Relocation model other than static"
"not supported.");
/*NOTREACHED*/
}
const SmallVectorImpl<ISD::InputArg>
&Ins,
DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) {
+ SmallVectorImpl<SDValue> &InVals)
+ const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
- const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
- const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
+ SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
unsigned ArgOffset = SPUFrameInfo::minStackSize();
unsigned ArgRegIdx = 0;
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+ *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
+
// Add DAG nodes to load the arguments or copy them out of registers.
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
SDValue ArgVal;
+ CCValAssign &VA = ArgLocs[ArgNo];
- if (ArgRegIdx < NumArgRegs) {
+ if (VA.isRegLoc()) {
const TargetRegisterClass *ArgRegClass;
switch (ObjectVT.getSimpleVT().SimpleTy) {
- default: {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "LowerFormalArguments Unhandled argument type: "
- << ObjectVT.getEVTString();
- llvm_report_error(Msg.str());
- }
+ default:
+ report_fatal_error("LowerFormalArguments Unhandled argument type: " +
+ Twine(ObjectVT.getEVTString()));
case MVT::i8:
ArgRegClass = &SPU::R8CRegClass;
break;
}
unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
- RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++ArgRegIdx;
} else {
// We need to load the argument to a virtual register if we determined
// above that we ran out of physical registers of the appropriate type
// or we're forced to do vararg
- int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, 0);
ArgOffset += StackSlotSize;
}
// vararg handling:
if (isVarArg) {
- // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
+ // FIXME: we should be able to query the argument registers from
+ // tablegen generated code.
+ static const unsigned ArgRegs[] = {
+ SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
+ SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
+ SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
+ SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
+ SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
+ SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
+ SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
+ SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
+ SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
+ SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
+ SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
+ };
+ // size of ArgRegs array
+ unsigned NumArgRegs = 77;
+
// We will spill (79-3)+1 registers to the stack
SmallVector<SDValue, 79-3+1> MemOps;
// Create the frame slot
-
for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
- VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
- true, false);
- SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
- SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
+ SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
false, false, 0);
Chain = Store.getOperand(0);
MemOps.push_back(Store);
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) {
+ SmallVectorImpl<SDValue> &InVals) const {
// CellSPU target does not yet support tail call optimization.
isTailCall = false;
const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
unsigned NumOps = Outs.size();
unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
- const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
- const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+ *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
+
+ const unsigned NumArgRegs = ArgLocs.size();
+
// Handy pointer type
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// And the arguments passed on the stack
SmallVector<SDValue, 8> MemOpChains;
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = Outs[i].Val;
+ for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
+ SDValue Arg = OutVals[ArgRegIdx];
+ CCValAssign &VA = ArgLocs[ArgRegIdx];
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
case MVT::i32:
case MVT::i64:
case MVT::i128:
- if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
- } else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
- false, false, 0));
- ArgOffset += StackSlotSize;
- }
- break;
case MVT::f32:
case MVT::f64:
- if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
- } else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
- false, false, 0));
- ArgOffset += StackSlotSize;
- }
- break;
case MVT::v2i64:
case MVT::v2f64:
case MVT::v4f32:
case MVT::v8i16:
case MVT::v16i8:
if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
false, false, 0));
ArgOffset += StackSlotSize;
}
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- GlobalValue *GV = G->getGlobal();
+ const GlobalValue *GV = G->getGlobal();
EVT CalleeVT = Callee.getValueType();
SDValue Zero = DAG.getConstant(0, PtrVT);
- SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
if (!ST->usingLargeMem()) {
// Turn calls to targets that are defined (i.e., have bodies) into BRSL
if (Ins.empty())
return Chain;
+ // Now handle the return value(s)
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
// If the call has results, copy the values out of the ret val registers.
- switch (Ins[0].VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unexpected ret value!");
- case MVT::Other: break;
- case MVT::i32:
- if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
- MVT::i32, InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
- Chain.getValue(2)).getValue(1);
- InVals.push_back(Chain.getValue(0));
- } else {
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- }
- break;
- case MVT::i64:
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- break;
- case MVT::i128:
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- break;
- case MVT::f32:
- case MVT::f64:
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- break;
- case MVT::v2f64:
- case MVT::v2i64:
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- break;
- }
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ InVals.push_back(Val);
+ }
return Chain;
}
SPUTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
- DebugLoc dl, SelectionDAG &DAG) {
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
Flag = Chain.getValue(1);
}
return SDValue();
Value = Value >> 32;
}
- if (isS10Constant(Value))
+ if (isInt<10>(Value))
return DAG.getTargetConstant(Value, ValueType);
}
uint64_t SplatBits = APSplatBits.getZExtValue();
switch (VT.getSimpleVT().SimpleTy) {
- default: {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
- << VT.getEVTString();
- llvm_report_error(Msg.str());
+ default:
+ report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
+ Twine(VT.getEVTString()));
/*NOTREACHED*/
- }
case MVT::v4f32: {
uint32_t Value32 = uint32_t(SplatBits);
assert(SplatBitSize == 32
SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
}
- case MVT::v2i32: {
- SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
- }
case MVT::v2i64: {
return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
}
// If we have a single element being moved from V1 to V2, this can be handled
// using the C*[DX] compute mask instructions, but the vector elements have
- // to be monotonically increasing with one exception element.
+ // to be monotonically increasing with one exception element, and the source
+ // slot of the element to move must be the same as the destination.
EVT VecVT = V1.getValueType();
EVT EltVT = VecVT.getVectorElementType();
unsigned EltsFromV2 = 0;
- unsigned V2Elt = 0;
+ unsigned V2EltOffset = 0;
unsigned V2EltIdx0 = 0;
unsigned CurrElt = 0;
unsigned MaxElts = VecVT.getVectorNumElements();
unsigned PrevElt = 0;
- unsigned V0Elt = 0;
bool monotonic = true;
bool rotate = true;
+ int rotamt=0;
+ EVT maskVT; // which of the c?d instructions to use
if (EltVT == MVT::i8) {
V2EltIdx0 = 16;
+ maskVT = MVT::v16i8;
} else if (EltVT == MVT::i16) {
V2EltIdx0 = 8;
+ maskVT = MVT::v8i16;
} else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
V2EltIdx0 = 4;
+ maskVT = MVT::v4i32;
} else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
V2EltIdx0 = 2;
+ maskVT = MVT::v2i64;
} else
llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
if (monotonic) {
if (SrcElt >= V2EltIdx0) {
- if (1 >= (++EltsFromV2)) {
- V2Elt = (V2EltIdx0 - SrcElt) << 2;
- }
+ // TODO: optimize for the monotonic case when several consecutive
+ // elements are taken form V2. Do we ever get such a case?
+ if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+ V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+ else
+ monotonic = false;
+ ++EltsFromV2;
} else if (CurrElt != SrcElt) {
monotonic = false;
}
if (PrevElt > 0 && SrcElt < MaxElts) {
if ((PrevElt == SrcElt - 1)
|| (PrevElt == MaxElts - 1 && SrcElt == 0)) {
+ rotamt = SrcElt-i;
PrevElt = SrcElt;
- if (SrcElt == 0)
- V0Elt = i;
} else {
rotate = false;
}
- } else if (PrevElt == 0) {
- // First time through, need to keep track of previous element
+ } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
+ // First time or after a "wrap around"
PrevElt = SrcElt;
} else {
// This isn't a rotation, takes elements from vector 2
if (EltsFromV2 == 1 && monotonic) {
// Compute mask and shuffle
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- // Initialize temporary register to 0
- SDValue InitTempReg =
- DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
- // Copy register's contents as index in SHUFFLE_MASK:
- SDValue ShufMaskOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
- DAG.getTargetConstant(V2Elt, MVT::i32),
- DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
+
+ // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
+ // R1 ($sp) is used here only as it is guaranteed to have last bits zero
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(V2EltOffset, MVT::i32));
+ SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
+ maskVT, Pointer);
+
// Use shuffle mask in SHUFB synthetic instruction:
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
ShufMaskOp);
} else if (rotate) {
- int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
-
+ if (rotamt < 0)
+ rotamt +=MaxElts;
+ rotamt *= EltVT.getSizeInBits()/8;
return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
V1, DAG.getConstant(rotamt, MVT::i16));
} else {
for (unsigned j = 0; j < BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
}
-
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
&ResultMask[0], ResultMask.size());
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
// Variable index: Rotate the requested element into slot 0, then replicate
// slot 0 across the vector
EVT VecVT = N.getValueType();
- if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
- llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
+ if (!VecVT.isSimple() || !VecVT.isVector()) {
+ report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
"vector type!");
}
switch (VT.getSimpleVT().SimpleTy) {
default:
- llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
+ report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
"type");
/*NOTREACHED*/
case MVT::i8: {
SDValue IdxOp = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
+ EVT eltVT = ValOp.getValueType();
- ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
- assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+ // use 0 when the lane to insert to is 'undef'
+ int64_t Offset=0;
+ if (IdxOp.getOpcode() != ISD::UNDEF) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
+ assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+ Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
+ }
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Use $sp ($1) because it's always 16-byte aligned and it's available:
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(CN->getSExtValue(), PtrVT));
- SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
+ DAG.getConstant(Offset, PtrVT));
+ // widen the mask when dealing with half vectors
+ EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+ 128/ VT.getVectorElementType().getSizeInBits());
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
SDValue result =
DAG.getNode(SPUISD::SHUFB, dl, VT,
All conversions to i64 are expanded to a libcall.
*/
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
- SPUTargetLowering &TLI) {
+ const SPUTargetLowering &TLI) {
EVT OpVT = Op.getValueType();
SDValue Op0 = Op.getOperand(0);
EVT Op0VT = Op0.getValueType();
All conversions from i64 are expanded to a libcall.
*/
static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
- SPUTargetLowering &TLI) {
+ const SPUTargetLowering &TLI) {
EVT OpVT = Op.getValueType();
SDValue Op0 = Op.getOperand(0);
EVT Op0VT = Op0.getValueType();
case ISD::SETONE:
compareOp = ISD::SETNE; break;
default:
- llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
+ report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
}
SDValue result =
SDValue Op0 = Op.getOperand(0);
EVT Op0VT = Op0.getValueType();
- if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
+ if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
// Create shuffle mask, least significant doubleword of quadword
unsigned maskHigh = 0x08090a0b;
unsigned maskLow = 0x0c0d0e0f;
DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
DAG.getConstant(31, MVT::i32));
+ // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
+ SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ dl, Op0VT, Op0,
+ DAG.getTargetConstant(
+ SPU::GPRCRegClass.getID(),
+ MVT::i32)), 0);
// Shuffle bytes - Copy the sign bits into the upper 64 bits
// and the input value into the lower 64 bits.
SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
- DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
-
+ extended, sraVal, shufMask);
return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
}
lowering of nodes.
*/
SDValue
-SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
+SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
unsigned Opc = (unsigned) Op.getOpcode();
EVT VT = Op.getValueType();
void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG)
+ SelectionDAG &DAG) const
{
#if 0
unsigned Opc = (unsigned) N->getOpcode();
case SPUISD::IndirectAddr: {
if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (CN != 0 && CN->getZExtValue() == 0) {
+ if (CN != 0 && CN->isNullValue()) {
// (SPUindirect (SPUaform <addr>, 0), 0) ->
// (SPUaform <addr>, 0)
return TargetLowering::getConstraintType(ConstraintLetter);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+SPUTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;\r
+ //FIXME: Seems like the supported constraint letters were just copied
+ // from PPC, as the following doesn't correspond to the GCC docs.
+ // I'm leaving it so until someone adds the corresponding lowering support.
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'd':
+ case 'v':
+ case 'y':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
std::pair<unsigned, const TargetRegisterClass*>
SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const
void
SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
// Default, for the time being, to the base class handler
- TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
- Ops, DAG);
+ TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
}
/// isLegalAddressImmediate - Return true if the integer value can be used
// The SPU target isn't yet aware of offsets.
return false;
}
+
+// can we compare to Imm without writing it into a register?
+bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ //ceqi, cgti, etc. all take s10 operand
+ return isInt<10>(Imm);
+}
+
+bool
+SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type * ) const{
+
+ // A-form: 18bit absolute address.
+ if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
+ return true;
+
+ // D-form: reg + 14bit offset
+ if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
+ return true;
+
+ // X-form: reg+reg
+ if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
+ return true;
+
+ return false;
+}
+