}
};
+/// LSBaseSDNode - Base class for LoadSDNode and StoreSDNode
+///
+class LSBaseSDNode : public SDNode {
+private:
+ //! SrcValue - Memory location for alias analysis.
+ const Value *SrcValue;
+
+ //! SVOffset - Memory location offset.
+ int SVOffset;
+
+ //! Alignment - Alignment of memory location in bytes.
+ unsigned Alignment;
+
+ //! IsVolatile - True if the store is volatile.
+ bool IsVolatile;
+protected:
+ //! Operand array for load and store
+ /*!
+ \note Moving this array to the base class captures more
+ common functionality shared between LoadSDNode and
+ StoreSDNode
+ */
+ SDOperand Ops[4];
+public:
+ LSBaseSDNode(ISD::NodeType NodeTy, SDVTList VTs, const Value *SV, int SVO,
+ unsigned Align, bool Vol)
+ : SDNode(NodeTy, VTs),
+ SrcValue(SV), SVOffset(SVO), Alignment(Align), IsVolatile(Vol)
+ { }
+
+ const SDOperand getChain() const {
+ return getOperand(0);
+ }
+ const SDOperand getBasePtr() const {
+ return getOperand(getOpcode() == ISD::LOAD ? 1 : 2);
+ }
+ const SDOperand getOffset() const {
+ return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
+ }
+ const SDOperand getValue() const {
+ assert(getOpcode() == ISD::STORE);
+ return getOperand(1);
+ }
+
+ const Value *getSrcValue() const { return SrcValue; }
+ int getSrcValueOffset() const { return SVOffset; }
+ unsigned getAlignment() const { return Alignment; }
+ bool isVolatile() const { return IsVolatile; }
+
+ static bool classof(const LSBaseSDNode *N) { return true; }
+ static bool classof(const SDNode *N) { return true; }
+};
+
/// LoadSDNode - This class is used to represent ISD::LOAD nodes.
///
-class LoadSDNode : public SDNode {
+class LoadSDNode : public LSBaseSDNode {
virtual void ANCHOR(); // Out-of-line virtual method to give class a home.
- SDOperand Ops[3];
// AddrMode - unindexed, pre-indexed, post-indexed.
ISD::MemIndexedMode AddrMode;
// LoadedVT - VT of loaded value before extension.
MVT::ValueType LoadedVT;
-
- // SrcValue - Memory location for alias analysis.
- const Value *SrcValue;
-
- // SVOffset - Memory location offset.
- int SVOffset;
-
- // Alignment - Alignment of memory location in bytes.
- unsigned Alignment;
-
- // IsVolatile - True if the load is volatile.
- bool IsVolatile;
protected:
friend class SelectionDAG;
LoadSDNode(SDOperand *ChainPtrOff, SDVTList VTs,
ISD::MemIndexedMode AM, ISD::LoadExtType ETy, MVT::ValueType LVT,
const Value *SV, int O=0, unsigned Align=0, bool Vol=false)
- : SDNode(ISD::LOAD, VTs),
- AddrMode(AM), ExtType(ETy), LoadedVT(LVT), SrcValue(SV), SVOffset(O),
- Alignment(Align), IsVolatile(Vol) {
+ : LSBaseSDNode(ISD::LOAD, VTs, SV, O, Align, Vol),
+ AddrMode(AM), ExtType(ETy), LoadedVT(LVT) {
Ops[0] = ChainPtrOff[0]; // Chain
Ops[1] = ChainPtrOff[1]; // Ptr
Ops[2] = ChainPtrOff[2]; // Off
}
public:
- const SDOperand getChain() const { return getOperand(0); }
- const SDOperand getBasePtr() const { return getOperand(1); }
- const SDOperand getOffset() const { return getOperand(2); }
ISD::MemIndexedMode getAddressingMode() const { return AddrMode; }
ISD::LoadExtType getExtensionType() const { return ExtType; }
MVT::ValueType getLoadedVT() const { return LoadedVT; }
- const Value *getSrcValue() const { return SrcValue; }
- int getSrcValueOffset() const { return SVOffset; }
- unsigned getAlignment() const { return Alignment; }
- bool isVolatile() const { return IsVolatile; }
static bool classof(const LoadSDNode *) { return true; }
+ static bool classof(const LSBaseSDNode *N) { return true; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::LOAD;
}
/// StoreSDNode - This class is used to represent ISD::STORE nodes.
///
-class StoreSDNode : public SDNode {
+class StoreSDNode : public LSBaseSDNode {
virtual void ANCHOR(); // Out-of-line virtual method to give class a home.
- SDOperand Ops[4];
// AddrMode - unindexed, pre-indexed, post-indexed.
ISD::MemIndexedMode AddrMode;
// StoredVT - VT of the value after truncation.
MVT::ValueType StoredVT;
-
- // SrcValue - Memory location for alias analysis.
- const Value *SrcValue;
-
- // SVOffset - Memory location offset.
- int SVOffset;
-
- // Alignment - Alignment of memory location in bytes.
- unsigned Alignment;
-
- // IsVolatile - True if the store is volatile.
- bool IsVolatile;
protected:
friend class SelectionDAG;
StoreSDNode(SDOperand *ChainValuePtrOff, SDVTList VTs,
ISD::MemIndexedMode AM, bool isTrunc, MVT::ValueType SVT,
const Value *SV, int O=0, unsigned Align=0, bool Vol=false)
- : SDNode(ISD::STORE, VTs),
- AddrMode(AM), IsTruncStore(isTrunc), StoredVT(SVT), SrcValue(SV),
- SVOffset(O), Alignment(Align), IsVolatile(Vol) {
+ : LSBaseSDNode(ISD::STORE, VTs, SV, O, Align, Vol),
+ AddrMode(AM), IsTruncStore(isTrunc), StoredVT(SVT) {
Ops[0] = ChainValuePtrOff[0]; // Chain
Ops[1] = ChainValuePtrOff[1]; // Value
Ops[2] = ChainValuePtrOff[2]; // Ptr
}
public:
- const SDOperand getChain() const { return getOperand(0); }
- const SDOperand getValue() const { return getOperand(1); }
- const SDOperand getBasePtr() const { return getOperand(2); }
- const SDOperand getOffset() const { return getOperand(3); }
ISD::MemIndexedMode getAddressingMode() const { return AddrMode; }
bool isTruncatingStore() const { return IsTruncStore; }
MVT::ValueType getStoredVT() const { return StoredVT; }
- const Value *getSrcValue() const { return SrcValue; }
- int getSrcValueOffset() const { return SVOffset; }
- unsigned getAlignment() const { return Alignment; }
- bool isVolatile() const { return IsVolatile; }
static bool classof(const StoreSDNode *) { return true; }
+ static bool classof(const LSBaseSDNode *N) { return true; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::STORE;
}
//===----------------------------------------------------------------------===//
// 7-bit integer type, used as an immediate:
-def cell_i7_ty: LLVMType<i16>; // Note: This was i8
-def cell_i8_ty: LLVMType<i16>; // Note: This was i8
+def cell_i7_ty: LLVMType<i8>;
+def cell_i8_ty: LLVMType<i8>;
class v16i8_u7imm<string builtin_suffix> :
GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
class v16i8_u8imm<string builtin_suffix> :
GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
+ Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
class v16i8_s10imm<string builtin_suffix> :
def CellSDKand:
RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "add\t $rT, $rA, $rB", IntegerOp,
+ "and\t $rT, $rA, $rB", IntegerOp,
[(set (v4i32 VECREG:$rT),
(int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
def CellSDKandc:
RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "addc\t $rT, $rA, $rB", IntegerOp,
+ "andc\t $rT, $rA, $rB", IntegerOp,
[(set (v4i32 VECREG:$rT),
(int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
def CellSDKandbi:
- RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
"andbi\t $rT, $rA, $val", BranchResolv,
[(set (v16i8 VECREG:$rT),
(int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
(int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
def CellSDKorbi:
- RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
"orbi\t $rT, $rA, $val", BranchResolv,
[(set (v16i8 VECREG:$rT),
(int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
(int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
def CellSDKxorbi:
- RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
"xorbi\t $rT, $rA, $val", BranchResolv,
[(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
(int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
def CellSDKceqbi:
- RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
"ceqbi\t $rT, $rA, $val", BranchResolv,
[(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
(int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
def CellSDKcgtbi:
- RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
"cgtbi\t $rT, $rA, $val", BranchResolv,
[(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
(int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
def CellSDKclgtbi:
- RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
"clgtbi\t $rT, $rA, $val", BranchResolv,
[(set (v16i8 VECREG:$rT),
(int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
SPUDAGToDAGISel::SelectAFormAddr(SDOperand Op, SDOperand N, SDOperand &Base,
SDOperand &Index) {
// These match the addr256k operand type:
- MVT::ValueType PtrVT = SPUtli.getPointerTy();
MVT::ValueType OffsVT = MVT::i16;
+ MVT::ValueType PtrVT = SPUtli.getPointerTy();
switch (N.getOpcode()) {
case ISD::Constant:
+ case ISD::ConstantPool:
+ case ISD::GlobalAddress:
+ cerr << "SPU SelectAFormAddr: Constant/Pool/Global not lowered.\n";
+ abort();
+ /*NOTREACHED*/
+
case ISD::TargetConstant: {
// Loading from a constant address.
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
return true;
}
}
- case ISD::ConstantPool:
- case ISD::TargetConstantPool: {
- // The constant pool address is N. Base is a dummy that will be ignored by
+ case ISD::TargetGlobalAddress:
+ case ISD::TargetConstantPool:
+ case SPUISD::AFormAddr: {
+ // The address is in Base. N is a dummy that will be ignored by
// the assembly printer.
Base = N;
Index = CurDAG->getTargetConstant(0, OffsVT);
return true;
}
-
- case ISD::GlobalAddress:
- case ISD::TargetGlobalAddress: {
- // The global address is N. Base is a dummy that is ignored by the
- // assembly printer.
- Base = N;
- Index = CurDAG->getTargetConstant(0, OffsVT);
- return true;
- }
}
return false;
Index = CurDAG->getTargetConstant(0, PtrTy);
return true;
} else if (Opc == ISD::FrameIndex) {
- // Stack frame index must be less than 512 (divided by 16):
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = "
- << FI->getIndex() << "\n");
+ << FI->getIndex() << "\n");
if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) {
Base = CurDAG->getTargetConstant(0, PtrTy);
Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
// Generated by getelementptr
const SDOperand Op0 = N.getOperand(0); // Frame index/base
const SDOperand Op1 = N.getOperand(1); // Offset within base
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1);
- // Not a constant?
- if (CN == 0)
- return false;
-
- int32_t offset = (int32_t) CN->getSignExtended();
- unsigned Opc0 = Op0.getOpcode();
-
- if ((offset & 0xf) != 0) {
- cerr << "SelectDFormAddr: unaligned offset = " << offset << "\n";
- abort();
- /*NOTREACHED*/
- }
+ if (Op1.getOpcode() == ISD::Constant
+ || Op1.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1);
+ assert(CN != 0 && "SelectDFormAddr: Expected a constant");
- if (Opc0 == ISD::FrameIndex) {
- FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0);
- DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
- << " frame index = " << FI->getIndex() << "\n");
+ int32_t offset = (int32_t) CN->getSignExtended();
+ unsigned Opc0 = Op0.getOpcode();
- if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) {
- Base = CurDAG->getTargetConstant(offset, PtrTy);
- Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
- return true;
+ if ((offset & 0xf) != 0) {
+ // Unaligned offset: punt and let X-form address handle it.
+ // NOTE: This really doesn't have to be strictly 16-byte aligned,
+ // since the load/store quadword instructions will implicitly
+ // zero the lower 4 bits of the resulting address.
+ return false;
}
- } else if (offset > SPUFrameInfo::minFrameOffset()
- && offset < SPUFrameInfo::maxFrameOffset()) {
- Base = CurDAG->getTargetConstant(offset, PtrTy);
- if (Opc0 == ISD::GlobalAddress) {
- // Convert global address to target global address
- GlobalAddressSDNode *GV = dyn_cast<GlobalAddressSDNode>(Op0);
- Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy);
- return true;
- } else {
- // Otherwise, just take operand 0
- Index = Op0;
- return true;
+
+ if (Opc0 == ISD::FrameIndex) {
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0);
+ DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
+ << " frame index = " << FI->getIndex() << "\n");
+
+ if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
+ return true;
+ }
+ } else if (offset > SPUFrameInfo::minFrameOffset()
+ && offset < SPUFrameInfo::maxFrameOffset()) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ if (Opc0 == ISD::GlobalAddress) {
+ // Convert global address to target global address
+ GlobalAddressSDNode *GV = dyn_cast<GlobalAddressSDNode>(Op0);
+ Index = CurDAG->getTargetGlobalAddress(GV->getGlobal(), PtrTy);
+ return true;
+ } else {
+ // Otherwise, just take operand 0
+ Index = Op0;
+ return true;
+ }
}
- }
+ } else
+ return false;
} else if (Opc == SPUISD::DFormAddr) {
// D-Form address: This is pretty straightforward, naturally...
ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy);
Index = N.getOperand(0);
return true;
+ } else if (Opc == ISD::FrameIndex) {
+ // Stack frame index must be less than 512 (divided by 16):
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
+ DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = "
+ << FI->getIndex() << "\n");
+ if (FI->getIndex() < SPUFrameInfo::maxFrameOffset()) {
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy);
+ return true;
+ }
}
return false;
unsigned N2Opc = N2.getOpcode();
if ((N1Opc == SPUISD::Hi && N2Opc == SPUISD::Lo)
- || (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi)) {
+ || (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi)
+ || (N1Opc == SPUISD::XFormAddr)) {
Base = N.getOperand(0);
Index = N.getOperand(1);
return true;
abort();
/*UNREACHED*/
}
+ } else if (Opc == SPUISD::XFormAddr) {
+ Base = N;
+ Index = N.getOperand(1);
+ return true;
} else if (N.getNumOperands() == 2) {
SDOperand N1 = N.getOperand(0);
SDOperand N2 = N.getOperand(1);
} else if (Opc == ISD::FrameIndex) {
// Selects to AIr32 FI, 0 which in turn will become AIr32 SP, imm.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- SDOperand TFI = CurDAG->getTargetFrameIndex(FI, SPUtli.getPointerTy());
+ MVT::ValueType PtrVT = SPUtli.getPointerTy();
+ SDOperand Zero = CurDAG->getTargetConstant(0, PtrVT);
+ SDOperand TFI = CurDAG->getTargetFrameIndex(FI, PtrVT);
DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AI32 <FI>, 0\n");
- return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI,
- CurDAG->getTargetConstant(0, MVT::i32));
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI, Zero);
+ CurDAG->getTargetNode(SPU::AIr32, Op.getValueType(), TFI, Zero);
} else if (Opc == SPUISD::LDRESULT) {
// Custom select instructions for LDRESULT
unsigned VT = N->getValueType(0);
/*!
\arg Op Operand to test
\return true if the operand is a memory target (i.e., global
- address, external symbol, constant pool) or an existing D-Form
+ address, external symbol, constant pool) or an A-form
address.
*/
bool isMemoryOperand(const SDOperand &Op)
const unsigned Opc = Op.getOpcode();
return (Opc == ISD::GlobalAddress
|| Opc == ISD::GlobalTLSAddress
- || Opc == ISD::FrameIndex
+ /* || Opc == ISD::FrameIndex */
|| Opc == ISD::JumpTable
|| Opc == ISD::ConstantPool
|| Opc == ISD::ExternalSymbol
|| Opc == ISD::TargetGlobalAddress
|| Opc == ISD::TargetGlobalTLSAddress
- || Opc == ISD::TargetFrameIndex
+ /* || Opc == ISD::TargetFrameIndex */
|| Opc == ISD::TargetJumpTable
|| Opc == ISD::TargetConstantPool
|| Opc == ISD::TargetExternalSymbol
- || Opc == SPUISD::DFormAddr);
+ || Opc == SPUISD::AFormAddr);
}
}
setOperationAction(ISD::OR, MVT::v16i8, Custom);
setOperationAction(ISD::XOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
-
+
setSetCCResultType(MVT::i32);
setShiftAmountType(MVT::i32);
setSetCCResultContents(ZeroOrOneSetCCResult);
node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
+ node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
// LowerOperation implementation
//===----------------------------------------------------------------------===//
+/// Aligned load common code for CellSPU
+/*!
+ \param[in] Op The SelectionDAG load or store operand
+ \param[in] DAG The selection DAG
+ \param[in] ST CellSPU subtarget information structure
+ \param[in,out] alignment Caller initializes this to the load or store node's
+ value from getAlignment(), may be updated while generating the aligned load
+ \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
+ offset (divisible by 16, modulo 16 == 0)
+ \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
+ offset of the preferred slot (modulo 16 != 0)
+ \param[in,out] VT Caller initializes this value type to the the load or store
+ node's loaded or stored value type; may be updated if an i1-extended load or
+ store.
+ \param[out] was16aligned true if the base pointer had 16-byte alignment,
+ otherwise false. Can help to determine if the chunk needs to be rotated.
+
+ Both load and store lowering load a block of data aligned on a 16-byte
+ boundary. This is the common aligned load code shared between both.
+ */
+static SDOperand
+AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
+ LSBaseSDNode *LSN,
+ unsigned &alignment, int &alignOffs, int &prefSlotOffs,
+ unsigned &VT, bool &was16aligned)
+{
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ const valtype_map_s *vtm = getValueTypeMapEntry(VT);
+ SDOperand basePtr = LSN->getBasePtr();
+ SDOperand chain = LSN->getChain();
+
+ if (basePtr.getOpcode() == ISD::ADD) {
+ SDOperand Op1 = basePtr.Val->getOperand(1);
+
+ if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
+ const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.Val->getOperand(1));
+
+ alignOffs = (int) CN->getValue();
+ prefSlotOffs = (int) (alignOffs & 0xf);
+
+ // Adjust the rotation amount to ensure that the final result ends up in
+ // the preferred slot:
+ prefSlotOffs -= vtm->prefslot_byte;
+ basePtr = basePtr.getOperand(0);
+
+ // Modify alignment, since the ADD is likely from getElementPtr:
+ switch (basePtr.getOpcode()) {
+ case ISD::GlobalAddress:
+ case ISD::TargetGlobalAddress: {
+ GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(basePtr.Val);
+ const GlobalValue *GV = GN->getGlobal();
+ alignment = GV->getAlignment();
+ break;
+ }
+ }
+ } else {
+ alignOffs = 0;
+ prefSlotOffs = -vtm->prefslot_byte;
+ }
+ } else {
+ alignOffs = 0;
+ prefSlotOffs = -vtm->prefslot_byte;
+ }
+
+ if (alignment == 16) {
+ // Realign the base pointer as a D-Form address:
+ if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
+ if (isMemoryOperand(basePtr)) {
+ SDOperand Zero = DAG.getConstant(0, PtrVT);
+ unsigned Opc = (!ST->usingLargeMem()
+ ? SPUISD::AFormAddr
+ : SPUISD::XFormAddr);
+ basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero);
+ }
+ basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
+ basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT));
+ }
+
+ // Emit the vector load:
+ was16aligned = true;
+ return DAG.getLoad(MVT::v16i8, chain, basePtr,
+ LSN->getSrcValue(), LSN->getSrcValueOffset(),
+ LSN->isVolatile(), 16);
+ }
+
+ // Unaligned load or we're using the "large memory" model, which means that
+ // we have to be very pessimistic:
+ if (isMemoryOperand(basePtr)) {
+ basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
+ }
+
+ // Add the offset
+ basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT));
+ was16aligned = false;
+ return DAG.getLoad(MVT::v16i8, chain, basePtr,
+ LSN->getSrcValue(), LSN->getSrcValueOffset(),
+ LSN->isVolatile(), 16);
+}
+
/// Custom lower loads for CellSPU
/*!
All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
static SDOperand
LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
LoadSDNode *LN = cast<LoadSDNode>(Op);
- SDOperand basep = LN->getBasePtr();
SDOperand the_chain = LN->getChain();
- MVT::ValueType BasepOpc = basep.Val->getOpcode();
MVT::ValueType VT = LN->getLoadedVT();
MVT::ValueType OpVT = Op.Val->getValueType(0);
- MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
ISD::LoadExtType ExtType = LN->getExtensionType();
unsigned alignment = LN->getAlignment();
- const valtype_map_s *vtm = getValueTypeMapEntry(VT);
SDOperand Ops[8];
- if (BasepOpc == ISD::FrameIndex) {
- // Loading from a frame index is always properly aligned. Always.
- return SDOperand();
- }
-
// For an extending load of an i1 variable, just call it i8 (or whatever we
// were passed) and make it zero-extended:
if (VT == MVT::i1) {
switch (LN->getAddressingMode()) {
case ISD::UNINDEXED: {
- SDOperand result;
- SDOperand rot_op, rotamt;
- SDOperand ptrp;
- int c_offset;
- int c_rotamt;
-
- // The vector type we really want to be when we load the 16-byte chunk
- MVT::ValueType vecVT, opVecVT;
-
- vecVT = MVT::v16i8;
- if (VT != MVT::i1)
- vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
- opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
+ int offset, rotamt;
+ bool was16aligned;
+ SDOperand result =
+ AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
- if (basep.getOpcode() == ISD::ADD) {
- const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
+ if (result.Val == 0)
+ return result;
- assert(CN != NULL
- && "LowerLOAD: ISD::ADD operand 1 is not constant");
+ the_chain = result.getValue(1);
+ // Rotate the chunk if necessary
+ if (rotamt < 0)
+ rotamt += 16;
+ if (rotamt != 0) {
+ SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
+
+ if (was16aligned) {
+ Ops[0] = the_chain;
+ Ops[1] = result;
+ Ops[2] = DAG.getConstant(rotamt, MVT::i16);
+ } else {
+ LoadSDNode *LN1 = cast<LoadSDNode>(result);
+ Ops[0] = the_chain;
+ Ops[1] = result;
+ Ops[2] = LN1->getBasePtr();
+ }
- c_offset = (int) CN->getValue();
- c_rotamt = (int) (c_offset & 0xf);
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
+ the_chain = result.getValue(1);
+ }
- // Adjust the rotation amount to ensure that the final result ends up in
- // the preferred slot:
- c_rotamt -= vtm->prefslot_byte;
- ptrp = basep.getOperand(0);
+ if (VT == OpVT || ExtType == ISD::EXTLOAD) {
+ SDVTList scalarvts;
+ MVT::ValueType vecVT = MVT::v16i8;
+
+ // Convert the loaded v16i8 vector to the appropriate vector type
+ // specified by the operand:
+ if (OpVT == VT) {
+ if (VT != MVT::i1)
+ vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
+ } else
+ vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
+
+ Ops[0] = the_chain;
+ Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
+ scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
+ result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
+ the_chain = result.getValue(1);
} else {
- c_offset = 0;
- c_rotamt = -vtm->prefslot_byte;
- ptrp = basep;
- }
+ // Handle the sign and zero-extending loads for i1 and i8:
+ unsigned NewOpC;
- if (alignment == 16) {
- // 16-byte aligned load into preferred slot, no rotation
- if (c_rotamt == 0) {
- if (isMemoryOperand(ptrp))
- // Return unchanged
- return SDOperand();
- else {
- // Return modified D-Form address for pointer:
- ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
- ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
- if (VT == OpVT)
- return DAG.getLoad(VT, LN->getChain(), ptrp,
- LN->getSrcValue(), LN->getSrcValueOffset(),
- LN->isVolatile(), 16);
- else
- return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
- LN->getSrcValueOffset(), OpVT,
- LN->isVolatile(), 16);
- }
+ if (ExtType == ISD::SEXTLOAD) {
+ NewOpC = (OpVT == MVT::i1
+ ? SPUISD::EXTRACT_I1_SEXT
+ : SPUISD::EXTRACT_I8_SEXT);
} else {
- // Need to rotate...
- if (c_rotamt < 0)
- c_rotamt += 16;
- // Realign the base pointer, with a D-Form address
- if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
- basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
- ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
- else
- basep = ptrp;
-
- // Rotate the load:
- rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
- LN->getSrcValue(), LN->getSrcValueOffset(),
- LN->isVolatile(), 16);
- the_chain = rot_op.getValue(1);
- rotamt = DAG.getConstant(c_rotamt, MVT::i16);
-
- SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
- Ops[0] = the_chain;
- Ops[1] = rot_op;
- Ops[2] = rotamt;
-
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
- the_chain = result.getValue(1);
-
- if (VT == OpVT || ExtType == ISD::EXTLOAD) {
- SDVTList scalarvts;
- Ops[0] = the_chain;
- Ops[1] = result;
- if (OpVT == VT) {
- scalarvts = DAG.getVTList(VT, MVT::Other);
- } else {
- scalarvts = DAG.getVTList(OpVT, MVT::Other);
- }
-
- result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
- result);
- Ops[0] = the_chain;
- Ops[1] = result;
- result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
- the_chain = result.getValue(1);
- } else {
- // Handle the sign and zero-extending loads for i1 and i8:
- unsigned NewOpC;
-
- if (ExtType == ISD::SEXTLOAD) {
- NewOpC = (OpVT == MVT::i1
- ? SPUISD::EXTRACT_I1_SEXT
- : SPUISD::EXTRACT_I8_SEXT);
- } else {
- assert(ExtType == ISD::ZEXTLOAD);
- NewOpC = (OpVT == MVT::i1
- ? SPUISD::EXTRACT_I1_ZEXT
- : SPUISD::EXTRACT_I8_ZEXT);
- }
-
- result = DAG.getNode(NewOpC, OpVT, result);
- }
-
- SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
- SDOperand retops[2] = { result, the_chain };
-
- result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
- return result;
- /*UNREACHED*/
- }
- } else {
- // Misaligned 16-byte load:
- if (basep.getOpcode() == ISD::LOAD) {
- LN = cast<LoadSDNode>(basep);
- if (LN->getAlignment() == 16) {
- // We can verify that we're really loading from a 16-byte aligned
- // chunk. Encapsulate basep as a D-Form address and return a new
- // load:
- basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
- DAG.getConstant(0, PtrVT));
- if (OpVT == VT)
- return DAG.getLoad(VT, LN->getChain(), basep,
- LN->getSrcValue(), LN->getSrcValueOffset(),
- LN->isVolatile(), 16);
- else
- return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
- LN->getSrcValue(), LN->getSrcValueOffset(),
- OpVT, LN->isVolatile(), 16);
- }
+ assert(ExtType == ISD::ZEXTLOAD);
+ NewOpC = (OpVT == MVT::i1
+ ? SPUISD::EXTRACT_I1_ZEXT
+ : SPUISD::EXTRACT_I8_ZEXT);
}
- // Catch all other cases where we can't guarantee that we have a
- // 16-byte aligned entity, which means resorting to an X-form
- // address scheme:
-
- SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
- SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs);
- SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs);
-
- ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
-
- SDOperand alignLoad =
- DAG.getLoad(opVecVT, LN->getChain(), ptrp,
- LN->getSrcValue(), LN->getSrcValueOffset(),
- LN->isVolatile(), 16);
-
- SDOperand insertEltOp =
- DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
-
- result = DAG.getNode(SPUISD::SHUFB, opVecVT,
- alignLoad,
- alignLoad,
- DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
-
- result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
+ result = DAG.getNode(NewOpC, OpVT, result);
+ }
- SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
- SDOperand retops[2] = { result, the_chain };
+ SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
+ SDOperand retops[2] = { result, the_chain };
- result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
- return result;
- }
- break;
+ result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
+ return result;
}
case ISD::PRE_INC:
case ISD::PRE_DEC:
MVT::ValueType VT = Value.getValueType();
MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- SDOperand the_chain = SN->getChain();
- //unsigned alignment = SN->getAlignment();
- //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
+ unsigned alignment = SN->getAlignment();
switch (SN->getAddressingMode()) {
case ISD::UNINDEXED: {
- SDOperand basep = SN->getBasePtr();
- SDOperand ptrOp;
- int offset;
-
- if (basep.getOpcode() == ISD::FrameIndex) {
- // FrameIndex nodes are always properly aligned. Really.
- return SDOperand();
- }
-
- if (basep.getOpcode() == ISD::ADD) {
- const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
- assert(CN != NULL
- && "LowerSTORE: ISD::ADD operand 1 is not constant");
- offset = unsigned(CN->getValue());
- ptrOp = basep.getOperand(0);
- DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
- << offset
- << "\n");
- } else {
- ptrOp = basep;
- offset = 0;
- }
+ int chunk_offset, slot_offset;
+ bool was16aligned;
// The vector type we really want to load from the 16-byte chunk, except
// in the case of MVT::i1, which has to be v16i8.
- unsigned vecVT, stVecVT;
-
+ unsigned vecVT, stVecVT = MVT::v16i8;
+
if (StVT != MVT::i1)
stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
- else
- stVecVT = MVT::v16i8;
vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
- // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
- // the actual dform addr offs($reg).
- basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
- DAG.getConstant((offset & ~0xf), PtrVT));
+ SDOperand alignLoadVec =
+ AlignedLoad(Op, DAG, ST, SN, alignment,
+ chunk_offset, slot_offset, VT, was16aligned);
- // Create the 16-byte aligned vector load
- SDOperand alignLoad =
- DAG.getLoad(vecVT, the_chain, basep,
- SN->getSrcValue(), SN->getSrcValueOffset(),
- SN->isVolatile(), 16);
- the_chain = alignLoad.getValue(1);
+ if (alignLoadVec.Val == 0)
+ return alignLoadVec;
- LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
+ LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
+ SDOperand basePtr = LN->getBasePtr();
+ SDOperand the_chain = alignLoadVec.getValue(1);
SDOperand theValue = SN->getValue();
SDOperand result;
theValue = theValue.getOperand(0);
}
- SDOperand insertEltOp =
- DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
- DAG.getNode(SPUISD::DFormAddr, PtrVT,
- ptrOp,
- DAG.getConstant((offset & 0xf), PtrVT)));
+ chunk_offset &= 0xf;
+ chunk_offset /= (MVT::getSizeInBits(StVT == MVT::i1 ? (unsigned) MVT::i8 : StVT) / 8);
+
+ SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
+ SDOperand insertEltPtr;
+ SDOperand insertEltOp;
+
+ // If the base pointer is already a D-form address, then just create
+ // a new D-form address with a slot offset and the orignal base pointer.
+ // Otherwise generate a D-form address with the slot offset relative
+ // to the stack pointer, which is always aligned.
+ if (basePtr.getOpcode() == SPUISD::DFormAddr) {
+ insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
+ basePtr.getOperand(0),
+ insertEltOffs);
+ } else {
+ insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ insertEltOffs);
+ }
+ insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
result = DAG.getNode(SPUISD::SHUFB, vecVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
- alignLoad,
+ alignLoadVec,
DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
- result = DAG.getStore(the_chain, result, basep,
+ result = DAG.getStore(the_chain, result, basePtr,
LN->getSrcValue(), LN->getSrcValueOffset(),
LN->isVolatile(), LN->getAlignment());
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
Constant *C = CP->getConstVal();
SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
- const TargetMachine &TM = DAG.getTarget();
SDOperand Zero = DAG.getConstant(0, PtrVT);
+ const TargetMachine &TM = DAG.getTarget();
if (TM.getRelocationModel() == Reloc::Static) {
if (!ST->usingLargeMem()) {
// Just return the SDOperand with the constant pool address in it.
return CPI;
} else {
+#if 1
// Generate hi/lo address pair
SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
+#else
+ return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero);
+#endif
}
}
const TargetMachine &TM = DAG.getTarget();
if (TM.getRelocationModel() == Reloc::Static) {
- if (!ST->usingLargeMem()) {
- // Just return the SDOperand with the jump table address in it.
- return JTI;
- } else {
- // Generate hi/lo address pair
- SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
- SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
-
- return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
- }
+ return (!ST->usingLargeMem()
+ ? JTI
+ : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
}
assert(0 &&
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
GlobalValue *GV = GSDN->getGlobal();
SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
- SDOperand Zero = DAG.getConstant(0, PtrVT);
const TargetMachine &TM = DAG.getTarget();
+ SDOperand Zero = DAG.getConstant(0, PtrVT);
if (TM.getRelocationModel() == Reloc::Static) {
- if (!ST->usingLargeMem()) {
- // Generate a local store address
- return GA;
- } else {
- // Generate hi/lo address pair
- SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
- SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
-
- return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
- }
+ return (!ST->usingLargeMem()
+ ? GA
+ : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
} else {
cerr << "LowerGlobalAddress: Relocation model other than static not "
<< "supported.\n";
static
SDOperand
-LowerCALL(SDOperand Op, SelectionDAG &DAG) {
+LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
SDOperand Chain = Op.getOperand(0);
#if 0
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
GlobalValue *GV = G->getGlobal();
unsigned CalleeVT = Callee.getValueType();
+ SDOperand Zero = DAG.getConstant(0, PtrVT);
+ SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
- // Turn calls to targets that are defined (i.e., have bodies) into BRSL
- // style calls, otherwise, external symbols are BRASL calls.
- // NOTE:
- // This may be an unsafe assumption for JIT and really large compilation
- // units.
- if (GV->isDeclaration()) {
- Callee = DAG.getGlobalAddress(GV, CalleeVT);
+ if (!ST->usingLargeMem()) {
+ // Turn calls to targets that are defined (i.e., have bodies) into BRSL
+ // style calls, otherwise, external symbols are BRASL calls. This assumes
+ // that declared/defined symbols are in the same compilation unit and can
+ // be reached through PC-relative jumps.
+ //
+ // NOTE:
+ // This may be an unsafe assumption for JIT and really large compilation
+ // units.
+ if (GV->isDeclaration()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
+ }
} else {
- Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
- DAG.getTargetGlobalAddress(GV, CalleeVT),
- DAG.getConstant(0, PtrVT));
+ // "Large memory" mode: Turn all calls into indirect calls with a X-form
+ // address pairs:
+ Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
- else if (SDNode *Dest = isLSAAddress(Callee, DAG))
+ else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
// If this is an absolute destination address that appears to be a legal
// local store address, use the munged value.
Callee = SDOperand(Dest, 0);
+ }
Ops.push_back(Chain);
Ops.push_back(Callee);
case ISD::FORMAL_ARGUMENTS:
return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
case ISD::CALL:
- return LowerCALL(Op, DAG);
+ return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
case ISD::RET:
return LowerRET(Op, DAG, getTargetMachine());
Hi, ///< High address component (upper 16)
Lo, ///< Low address component (lower 16)
PCRelAddr, ///< Program counter relative address
+ AFormAddr, ///< A-form address (local store)
DFormAddr, ///< D-Form address "imm($r)"
- XFormAddr, ///< X-Form address "$r1($r2)"
+ XFormAddr, ///< X-Form address "$r($r)"
LDRESULT, ///< Load result (value, chain)
CALL, ///< CALL instruction
def LQAr32:
RI16Form<0b100001100, (outs R32C:$rT), (ins addr256k:$src),
"lqa\t$rT, $src", LoadStore,
- [(set R32C:$rT, (load aform_addr:$src))]>;
+ [(set R32C:$rT, (load aform_addr:$src))]>;
def LQAf32:
RI16Form<0b100001100, (outs R32FP:$rT), (ins addr256k:$src),
RegConstraint<"$rS = $rT">,
NoEncode<"$rS">;
+def IOHLlo:
+ RI16Form<0b100000110, (outs R32C:$rT), (ins R32C:$rS, symbolLo:$val),
+ "iohl\t$rT, $val", ImmLoad,
+ [/* no pattern */]>,
+ RegConstraint<"$rS = $rT">,
+ NoEncode<"$rS">;
+
// Form select mask for bytes using immediate, used in conjunction with the
// SELB instruction:
// are used here for type checking (instances where ROTQBI is used actually
// use vector registers)
def ROTQBYvec:
- RRForm<0b00111011100, (outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
+ RRForm<0b00111011100, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
"rotqby\t$rT, $rA, $rB", RotateShift,
- [(set (v16i8 VECREG:$rT), (SPUrotbytes_left (v16i8 VECREG:$rA), R16C:$rB))]>;
+ [(set (v16i8 VECREG:$rT), (SPUrotbytes_left (v16i8 VECREG:$rA), R32C:$rB))]>;
-def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), R16C:$rB),
- (ROTQBYvec VECREG:$rA, R16C:$rB)>;
+def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), R32C:$rB),
+ (ROTQBYvec VECREG:$rA, R32C:$rB)>;
// See ROTQBY note above.
def ROTQBYIvec:
[/* no pattern to match: intrinsic */]>;
def CEQBIr8:
- RI10Form<0b01111110, (outs R8C:$rT), (ins R8C:$rA, s7imm:$val),
+ RI10Form<0b01111110, (outs R8C:$rT), (ins R8C:$rA, s7imm_i8:$val),
"ceqbi\t$rT, $rA, $val", ByteOp,
[/* no pattern to match: intrinsic */]>;
def CEQBIv16i8:
- RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, s7imm:$val),
+ RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, s7imm_i8:$val),
"ceqbi\t$rT, $rA, $val", ByteOp,
[/* no pattern to match: intrinsic */]>;
def BRASL:
BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops),
"brasl\t$$lr, $func",
- [(SPUcall tglobaladdr:$func)]>;
+ [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
// Branch indirect and set link if external data. These instructions are not
// actually generated, matched by an intrinsic:
// low parts in order to load them into a register.
//===----------------------------------------------------------------------===//
-def : Pat<(SPUhi tglobaladdr:$in, 0), (ILHUhi tglobaladdr:$in)>;
-def : Pat<(SPUlo tglobaladdr:$in, 0), (ILAlo tglobaladdr:$in)>;
-def : Pat<(SPUdform tglobaladdr:$in, imm:$imm), (ILAlsa tglobaladdr:$in)>;
-def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>;
-def : Pat<(SPUlo tconstpool:$in , 0), (ILAlo tconstpool:$in)>;
-def : Pat<(SPUdform tconstpool:$in, imm:$imm), (ILAlsa tconstpool:$in)>;
-def : Pat<(SPUhi tjumptable:$in, 0), (ILHUhi tjumptable:$in)>;
-def : Pat<(SPUlo tjumptable:$in, 0), (ILAlo tjumptable:$in)>;
-def : Pat<(SPUdform tjumptable:$in, imm:$imm), (ILAlsa tjumptable:$in)>;
-
-// Force load of global address to a register. These forms show up in
-// SPUISD::DFormAddr pseudo instructions:
-def : Pat<(add tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
-def : Pat<(add tconstpool:$in, 0), (ILAlsa tglobaladdr:$in)>;
-def : Pat<(add tjumptable:$in, 0), (ILAlsa tglobaladdr:$in)>;
+def : Pat<(SPUhi tglobaladdr:$in, 0), (ILHUhi tglobaladdr:$in)>;
+def : Pat<(SPUlo tglobaladdr:$in, 0), (ILAlo tglobaladdr:$in)>;
+def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
+def : Pat<(SPUxform tglobaladdr:$in, 0),
+ (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+def : Pat<(SPUhi tjumptable:$in, 0), (ILHUhi tjumptable:$in)>;
+def : Pat<(SPUlo tjumptable:$in, 0), (ILAlo tjumptable:$in)>;
+def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>;
+def : Pat<(SPUxform tjumptable:$in, 0),
+ (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>;
+def : Pat<(SPUlo tconstpool:$in , 0), (ILAlo tconstpool:$in)>;
+def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>;
+/* def : Pat<(SPUxform tconstpool:$in, 0),
+ (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; */
+
// Instrinsics:
include "CellSDKIntrinsics.td"
// PC-relative address
def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
+// A-Form local store addresses
+def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
+
// D-Form "imm($reg)" addresses
def SPUdform : SDNode<"SPUISD::DFormAddr", SDTIntBinOp, []>;
+// X-Form "$reg($reg)" addresses
+def SPUxform : SDNode<"SPUISD::XFormAddr", SDTIntBinOp, []>;
+
// SPU 32-bit sign-extension to 64-bits
def SPUsext32_to_64: SDNode<"SPUISD::SEXT32TO64", SDTIntExtendOp, []>;
return ((Value & ((1 << 19) - 1)) == Value);
}]>;
+def lo16 : PatLeaf<(imm), [{
+ // hi16 predicate - returns true if the immediate has all zeros in the
+ // low order bits and is a 32-bit constant:
+ if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = N->getValue();
+ return ((val & 0x0000ffff) == val);
+ }
+
+ return false;
+}], LO16>;
+
def hi16 : PatLeaf<(imm), [{
// hi16 predicate - returns true if the immediate has all zeros in the
// low order bits and is a 32-bit constant:
//===----------------------------------------------------------------------===//
// Operand Definitions.
-def s7imm: Operand<i16> {
+def s7imm: Operand<i8> {
+ let PrintMethod = "printS7ImmOperand";
+}
+
+def s7imm_i8: Operand<i8> {
let PrintMethod = "printS7ImmOperand";
}
; RUN: grep andi %t1.s | count 36
; RUN: grep andhi %t1.s | count 30
; RUN: grep andbi %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
; AND instruction generation:
define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
--- /dev/null
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep bisl %t1.s | count 6 &&
+; RUN: grep ila %t1.s | count 1 &&
+; RUN: grep rotqbyi %t1.s | count 4 &&
+; RUN: grep lqa %t1.s | count 4 &&
+; RUN: grep lqd %t1.s | count 6 &&
+; RUN: grep dispatch_tab %t1.s | count 10
+; ModuleID = 'call_indirect.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16
+
+define void @dispatcher(i32 %i_arg, float %f_arg) {
+entry:
+ %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16
+ tail call void %tmp2( i32 %i_arg, float %f_arg )
+ %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4
+ tail call void %tmp2.1( i32 %i_arg, float %f_arg )
+ %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4
+ tail call void %tmp2.2( i32 %i_arg, float %f_arg )
+ %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4
+ tail call void %tmp2.3( i32 %i_arg, float %f_arg )
+ %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4
+ tail call void %tmp2.4( i32 %i_arg, float %f_arg )
+ %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4
+ tail call void %tmp2.5( i32 %i_arg, float %f_arg )
+ ret void
+}
; RUN: grep andi %t1.s | count 3 &&
; RUN: grep rotmi %t1.s | count 2 &&
; RUN: grep rothmi %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
declare i32 @llvm.ctpop.i8(i8)
declare i32 @llvm.ctpop.i16(i16)
; RUN: grep dfnms %t1.s | count 4
;
; This file includes double precision floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define double @fadd(double %arg1, double %arg2) {
%A = add double %arg1, %arg2
; Alternatively, a ^ ~b, which the compiler will also match.
; ModuleID = 'eqv.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
%A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
; RUN: grep lqx %t2.s | count 27 &&
; RUN: grep space %t1.s | count 8 &&
; RUN: grep byte %t1.s | count 424
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define i32 @i32_extract_0(<4 x i32> %v) {
entry:
; RUN: grep fcmeq %t1.s | count 1
;
; This file includes standard floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
declare double @fabs(double)
declare float @fabsf(float)
; RUN: grep fnms %t1.s | count 2
;
; This file includes standard floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define float @fdiv32(float %arg1, float %arg2) {
%A = fdiv float %arg1, %arg2
; RUN: grep xor %t1.s | count 4 &&
; RUN: grep and %t1.s | count 5 &&
; RUN: grep andbi %t1.s | count 3
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define double @fneg_dp(double %X) {
%Y = sub double -0.000000e+00, %X
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep "ilh" %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define i16 @test_1() {
%x = alloca i16, align 16
; RUN: grep 49077 %t1.s | count 1 &&
; RUN: grep 1267 %t1.s | count 2 &&
; RUN: grep 16309 %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define i32 @test_1() {
ret i32 4784128 ;; ILHU via pattern (0x49000)
; RUN: grep 128 %t1.s | count 30 &&
; RUN: grep 224 %t1.s | count 2
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
; 1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202)
; 18446744073709551591 => 0x ffffffff ffffffe7 (-25)
; 18446744073708516742 => 0x ffffffff fff03586 (-1034874)
; RUN: grep andi %t1.s | count 1 &&
; RUN: grep ila %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
define float @sitofp_i32(i32 %arg1) {
%A = sitofp i32 %arg1 to float ; <float> [#uses=1]
ret float %A
--- /dev/null
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep ceq %t1.s | count 30 &&
+; RUN: grep ceqb %t1.s | count 10 &&
+; RUN: grep ceqhi %t1.s | count 5 &&
+; RUN: grep ceqi %t1.s | count 5 &&
+; RUN: grep cgt %t1.s | count 30 &&
+; RUN: grep cgtb %t1.s | count 10 &&
+; RUN: grep cgthi %t1.s | count 5 &&
+; RUN: grep cgti %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8)
+
+
+
+define <4 x i32> @test(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) {
+ call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) {
+ call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @ceqitest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbitest(<16 x i8> %A) {
+ call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) {
+ call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) {
+ call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgtitest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbitest(<16 x i8> %A) {
+ call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) {
+ call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) {
+ call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgtitest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbitest(<16 x i8> %A) {
+ call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
--- /dev/null
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep fa %t1.s | count 5 &&
+; RUN: grep fs %t1.s | count 5 &&
+; RUN: grep fm %t1.s | count 15 &&
+; RUN: grep fceq %t1.s | count 5 &&
+; RUN: grep fcmeq %t1.s | count 5 &&
+; RUN: grep fcgt %t1.s | count 5 &&
+; RUN: grep fcmgt %t1.s | count 5 &&
+; RUN: grep fma %t1.s | count 5 &&
+; RUN: grep fnms %t1.s | count 5 &&
+; RUN: grep fms %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>)
+
+define <4 x i32> @test(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+ call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+ call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+ call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
\ No newline at end of file
--- /dev/null
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep and %t1.s | count 20 &&
+; RUN: grep andc %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @anditest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @andhitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
; RUN: grep and %t1.s | count 94
; RUN: grep xsbh %t1.s | count 2
; RUN: grep xshw %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
%A = and <4 x i32> %arg2, %arg1 ; <<4 x i32>> [#uses=1]
; RUN: grep ori %t1.s | count 30
; RUN: grep orhi %t1.s | count 30
; RUN: grep orbi %t1.s | count 15
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
; OR instruction generation:
define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
; RUN grep rothi.*,.3 %t1.s | count 1
; RUN: grep andhi %t1.s | count 4
; RUN: grep shlhi %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
; Vector rotates are not currently supported in gcc or llvm assembly. These are
; not tested.
; RUN: grep and %t1.s | count 2
; RUN: grep xsbh %t1.s | count 1
; RUN: grep xshw %t1.s | count 2
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define <16 x i8> @selb_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
%A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
; RUN: grep shli %t1.s | count 51
; RUN: grep xshw %t1.s | count 5
; RUN: grep and %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
; Vector shifts are not currently supported in gcc or llvm assembly. These are
; not tested.
;
; This file includes standard floating point arithmetic instructions
; NOTE fdiv is tested separately since it is a compound operation
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define float @fp_add(float %arg1, float %arg2) {
%A = add float %arg1, %arg2 ; <float> [#uses=1]
--- /dev/null
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep lqa %t1.s | count 10 &&
+; RUN: grep lqd %t1.s | count 2 &&
+; RUN: grep rotqbyi %t1.s | count 5 &&
+; RUN: grep xshw %t1.s | count 1 &&
+; RUN: grep andi %t1.s | count 4 &&
+; RUN: grep cbd %t1.s | count 3 &&
+; RUN: grep chd %t1.s | count 1 &&
+; RUN: grep cwd %t1.s | count 1 &&
+; RUN: grep shufb %t1.s | count 5 &&
+; RUN: grep stqa %t1.s | count 5
+; ModuleID = 'struct_1.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; struct hackstate {
+; unsigned char c1; // offset 0 (rotate left by 13 bytes to byte 3)
+; unsigned char c2; // offset 1 (rotate left by 14 bytes to byte 3)
+; unsigned char c3; // offset 2 (rotate left by 15 bytes to byte 3)
+; int i1; // offset 4 (rotate left by 4 bytes to byte 0)
+; short s1; // offset 8 (rotate left by 6 bytes to byte 2)
+; int i2; // offset 12 [ignored]
+; unsigned char c4; // offset 16 [ignored]
+; unsigned char c5; // offset 17 [ignored]
+; unsigned char c6; // offset 18 [ignored]
+; unsigned char c7; // offset 19 (no rotate, in preferred slot)
+; int i3; // offset 20 [ignored]
+; int i4; // offset 24 [ignored]
+; int i5; // offset 28 [ignored]
+; int i6; // offset 32 (no rotate, in preferred slot)
+; }
+%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 }
+
+; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+@state = global %struct.hackstate zeroinitializer, align 16
+
+define i8 @get_hackstate_c1() zeroext {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+ ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c2() zeroext {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+ ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c3() zeroext {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+ ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i1() {
+entry:
+ %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+ ret i32 %tmp2
+}
+
+define i16 @get_hackstate_s1() signext {
+entry:
+ %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+ ret i16 %tmp2
+}
+
+define i8 @get_hackstate_c7() zeroext {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16
+ ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i6() zeroext {
+entry:
+ %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
+ ret i32 %tmp2
+}
+
+define void @set_hackstate_c1(i8 zeroext %c) {
+entry:
+ store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+ ret void
+}
+
+define void @set_hackstate_c2(i8 zeroext %c) {
+entry:
+ store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+ ret void
+}
+
+define void @set_hackstate_c3(i8 zeroext %c) {
+entry:
+ store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+ ret void
+}
+
+define void @set_hackstate_i1(i32 %i) {
+entry:
+ store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+ ret void
+}
+
+define void @set_hackstate_s1(i16 signext %s) {
+entry:
+ store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+ ret void
+}