-//
//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
// The LLVM Compiler Infrastructure
//
//
//===----------------------------------------------------------------------===//
-#include "SPURegisterNames.h"
#include "SPUISelLowering.h"
#include "SPUTargetMachine.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
+#include "SPUMachineFunction.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
#include "llvm/CallingConv.h"
+#include "llvm/Type.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/VectorExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
namespace {
std::map<unsigned, const char *> node_names;
- //! EVT mapping to useful data for Cell SPU
- struct valtype_map_s {
- const EVT valtype;
- const int prefslot_byte;
- };
-
- const valtype_map_s valtype_map[] = {
- { EVT::i1, 3 },
- { EVT::i8, 3 },
- { EVT::i16, 2 },
- { EVT::i32, 0 },
- { EVT::f32, 0 },
- { EVT::i64, 0 },
- { EVT::f64, 0 },
- { EVT::i128, 0 }
- };
-
- const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
-
- const valtype_map_s *getValueTypeMapEntry(EVT VT) {
- const valtype_map_s *retval = 0;
-
- for (size_t i = 0; i < n_valtype_map; ++i) {
- if (valtype_map[i].valtype == VT) {
- retval = valtype_map + i;
- break;
- }
- }
-
-#ifndef NDEBUG
- if (retval == 0) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "getValueTypeMapEntry returns NULL for "
- << VT.getEVTString();
- llvm_report_error(Msg.str());
- }
-#endif
+ // Byte offset of the preferred slot (counted from the MSB)
+ int prefslotOffset(EVT VT) {
+ int retval=0;
+ if (VT==MVT::i1) retval=3;
+ if (VT==MVT::i8) retval=3;
+ if (VT==MVT::i16) retval=2;
return retval;
}
SDValue
ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
- bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
+ bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
// The input chain to this libcall is the entry node of the function.
// Legalizing the call will automatically add the previous call to the
// dependence.
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
EVT ArgVT = Op.getOperand(i).getValueType();
- const Type *ArgTy = ArgVT.getTypeForEVT();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op.getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
TLI.getPointerTy());
// Splice the libcall in wherever FindInputOutputChains tells us to.
- const Type *RetTy = Op.getNode()->getValueType(0).getTypeForEVT();
+ Type *RetTy =
+ Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, CallingConv::C, false,
- /*isReturnValueUsed=*/true,
- Callee, Args, DAG,
- Op.getDebugLoc());
+ 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Op.getDebugLoc());
return CallInfo.first;
}
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()),
SPUTM(TM) {
- // Fold away setcc operations if possible.
- setPow2DivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(true);
setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
// Set up the SPU's register classes:
- addRegisterClass(EVT::i8, SPU::R8CRegisterClass);
- addRegisterClass(EVT::i16, SPU::R16CRegisterClass);
- addRegisterClass(EVT::i32, SPU::R32CRegisterClass);
- addRegisterClass(EVT::i64, SPU::R64CRegisterClass);
- addRegisterClass(EVT::f32, SPU::R32FPRegisterClass);
- addRegisterClass(EVT::f64, SPU::R64FPRegisterClass);
- addRegisterClass(EVT::i128, SPU::GPRCRegisterClass);
+ addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
+ addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
+ addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
+ addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
+ addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
+ addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
+ addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
// SPU has no sign or zero extended loads for i1, i8, i16:
- setLoadExtAction(ISD::EXTLOAD, EVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, EVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, EVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, EVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, EVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
- setTruncStoreAction(EVT::i128, EVT::i64, Expand);
- setTruncStoreAction(EVT::i128, EVT::i32, Expand);
- setTruncStoreAction(EVT::i128, EVT::i16, Expand);
- setTruncStoreAction(EVT::i128, EVT::i8, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i64, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i8, Expand);
- setTruncStoreAction(EVT::f64, EVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// SPU constant load actions are custom lowered:
- setOperationAction(ISD::ConstantFP, EVT::f32, Legal);
- setOperationAction(ISD::ConstantFP, EVT::f64, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
// SPU's loads and stores have to be custom lowered:
- for (unsigned sctype = (unsigned) EVT::i8; sctype < (unsigned) EVT::i128;
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
++sctype) {
- EVT::SimpleValueType VT = (EVT::SimpleValueType)sctype;
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
- for (unsigned stype = sctype - 1; stype >= (unsigned) EVT::i8; --stype) {
- EVT::SimpleValueType StoreVT = (EVT::SimpleValueType) stype;
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+ MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
setTruncStoreAction(VT, StoreVT, Expand);
}
}
- for (unsigned sctype = (unsigned) EVT::f32; sctype < (unsigned) EVT::f64;
+ for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
++sctype) {
- EVT::SimpleValueType VT = (EVT::SimpleValueType) sctype;
+ MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
- for (unsigned stype = sctype - 1; stype >= (unsigned) EVT::f32; --stype) {
- EVT::SimpleValueType StoreVT = (EVT::SimpleValueType) stype;
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+ MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
setTruncStoreAction(VT, StoreVT, Expand);
}
}
// Expand the jumptable branches
- setOperationAction(ISD::BR_JT, EVT::Other, Expand);
- setOperationAction(ISD::BR_CC, EVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
// Custom lower SELECT_CC for most cases, but expand by default
- setOperationAction(ISD::SELECT_CC, EVT::Other, Expand);
- setOperationAction(ISD::SELECT_CC, EVT::i8, Custom);
- setOperationAction(ISD::SELECT_CC, EVT::i16, Custom);
- setOperationAction(ISD::SELECT_CC, EVT::i32, Custom);
- setOperationAction(ISD::SELECT_CC, EVT::i64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
// SPU has no intrinsics for these particular operations:
- setOperationAction(ISD::MEMBARRIER, EVT::Other, Expand);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
// SPU has no division/remainder instructions
- setOperationAction(ISD::SREM, EVT::i8, Expand);
- setOperationAction(ISD::UREM, EVT::i8, Expand);
- setOperationAction(ISD::SDIV, EVT::i8, Expand);
- setOperationAction(ISD::UDIV, EVT::i8, Expand);
- setOperationAction(ISD::SDIVREM, EVT::i8, Expand);
- setOperationAction(ISD::UDIVREM, EVT::i8, Expand);
- setOperationAction(ISD::SREM, EVT::i16, Expand);
- setOperationAction(ISD::UREM, EVT::i16, Expand);
- setOperationAction(ISD::SDIV, EVT::i16, Expand);
- setOperationAction(ISD::UDIV, EVT::i16, Expand);
- setOperationAction(ISD::SDIVREM, EVT::i16, Expand);
- setOperationAction(ISD::UDIVREM, EVT::i16, Expand);
- setOperationAction(ISD::SREM, EVT::i32, Expand);
- setOperationAction(ISD::UREM, EVT::i32, Expand);
- setOperationAction(ISD::SDIV, EVT::i32, Expand);
- setOperationAction(ISD::UDIV, EVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, EVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, EVT::i32, Expand);
- setOperationAction(ISD::SREM, EVT::i64, Expand);
- setOperationAction(ISD::UREM, EVT::i64, Expand);
- setOperationAction(ISD::SDIV, EVT::i64, Expand);
- setOperationAction(ISD::UDIV, EVT::i64, Expand);
- setOperationAction(ISD::SDIVREM, EVT::i64, Expand);
- setOperationAction(ISD::UDIVREM, EVT::i64, Expand);
- setOperationAction(ISD::SREM, EVT::i128, Expand);
- setOperationAction(ISD::UREM, EVT::i128, Expand);
- setOperationAction(ISD::SDIV, EVT::i128, Expand);
- setOperationAction(ISD::UDIV, EVT::i128, Expand);
- setOperationAction(ISD::SDIVREM, EVT::i128, Expand);
- setOperationAction(ISD::UDIVREM, EVT::i128, Expand);
+ setOperationAction(ISD::SREM, MVT::i8, Expand);
+ setOperationAction(ISD::UREM, MVT::i8, Expand);
+ setOperationAction(ISD::SDIV, MVT::i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::i8, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::SREM, MVT::i16, Expand);
+ setOperationAction(ISD::UREM, MVT::i16, Expand);
+ setOperationAction(ISD::SDIV, MVT::i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::i16, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i128, Expand);
+ setOperationAction(ISD::UREM, MVT::i128, Expand);
+ setOperationAction(ISD::SDIV, MVT::i128, Expand);
+ setOperationAction(ISD::UDIV, MVT::i128, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
// We don't support sin/cos/sqrt/fmod
- setOperationAction(ISD::FSIN , EVT::f64, Expand);
- setOperationAction(ISD::FCOS , EVT::f64, Expand);
- setOperationAction(ISD::FREM , EVT::f64, Expand);
- setOperationAction(ISD::FSIN , EVT::f32, Expand);
- setOperationAction(ISD::FCOS , EVT::f32, Expand);
- setOperationAction(ISD::FREM , EVT::f32, Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
// Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
// for f32!)
- setOperationAction(ISD::FSQRT, EVT::f64, Expand);
- setOperationAction(ISD::FSQRT, EVT::f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, EVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, EVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
// SPU can do rotate right and left, so legalize it... but customize for i8
// because instructions don't exist.
// FIXME: Change from "expand" to appropriate type once ROTR is supported in
// .td files.
- setOperationAction(ISD::ROTR, EVT::i32, Expand /*Legal*/);
- setOperationAction(ISD::ROTR, EVT::i16, Expand /*Legal*/);
- setOperationAction(ISD::ROTR, EVT::i8, Expand /*Custom*/);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
- setOperationAction(ISD::ROTL, EVT::i32, Legal);
- setOperationAction(ISD::ROTL, EVT::i16, Legal);
- setOperationAction(ISD::ROTL, EVT::i8, Custom);
+ setOperationAction(ISD::ROTL, MVT::i32, Legal);
+ setOperationAction(ISD::ROTL, MVT::i16, Legal);
+ setOperationAction(ISD::ROTL, MVT::i8, Custom);
// SPU has no native version of shift left/right for i8
- setOperationAction(ISD::SHL, EVT::i8, Custom);
- setOperationAction(ISD::SRL, EVT::i8, Custom);
- setOperationAction(ISD::SRA, EVT::i8, Custom);
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
// Make these operations legal and handle them during instruction selection:
- setOperationAction(ISD::SHL, EVT::i64, Legal);
- setOperationAction(ISD::SRL, EVT::i64, Legal);
- setOperationAction(ISD::SRA, EVT::i64, Legal);
+ setOperationAction(ISD::SHL, MVT::i64, Legal);
+ setOperationAction(ISD::SRL, MVT::i64, Legal);
+ setOperationAction(ISD::SRA, MVT::i64, Legal);
// Custom lower i8, i32 and i64 multiplications
- setOperationAction(ISD::MUL, EVT::i8, Custom);
- setOperationAction(ISD::MUL, EVT::i32, Legal);
- setOperationAction(ISD::MUL, EVT::i64, Legal);
+ setOperationAction(ISD::MUL, MVT::i8, Custom);
+ setOperationAction(ISD::MUL, MVT::i32, Legal);
+ setOperationAction(ISD::MUL, MVT::i64, Legal);
// Expand double-width multiplication
// FIXME: It would probably be reasonable to support some of these operations
- setOperationAction(ISD::UMUL_LOHI, EVT::i8, Expand);
- setOperationAction(ISD::SMUL_LOHI, EVT::i8, Expand);
- setOperationAction(ISD::MULHU, EVT::i8, Expand);
- setOperationAction(ISD::MULHS, EVT::i8, Expand);
- setOperationAction(ISD::UMUL_LOHI, EVT::i16, Expand);
- setOperationAction(ISD::SMUL_LOHI, EVT::i16, Expand);
- setOperationAction(ISD::MULHU, EVT::i16, Expand);
- setOperationAction(ISD::MULHS, EVT::i16, Expand);
- setOperationAction(ISD::UMUL_LOHI, EVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, EVT::i32, Expand);
- setOperationAction(ISD::MULHU, EVT::i32, Expand);
- setOperationAction(ISD::MULHS, EVT::i32, Expand);
- setOperationAction(ISD::UMUL_LOHI, EVT::i64, Expand);
- setOperationAction(ISD::SMUL_LOHI, EVT::i64, Expand);
- setOperationAction(ISD::MULHU, EVT::i64, Expand);
- setOperationAction(ISD::MULHS, EVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::MULHU, MVT::i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
// Need to custom handle (some) common i8, i64 math ops
- setOperationAction(ISD::ADD, EVT::i8, Custom);
- setOperationAction(ISD::ADD, EVT::i64, Legal);
- setOperationAction(ISD::SUB, EVT::i8, Custom);
- setOperationAction(ISD::SUB, EVT::i64, Legal);
+ setOperationAction(ISD::ADD, MVT::i8, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
+ setOperationAction(ISD::SUB, MVT::i8, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Legal);
// SPU does not have BSWAP. It does have i32 support CTLZ.
// CTPOP has to be custom lowered.
- setOperationAction(ISD::BSWAP, EVT::i32, Expand);
- setOperationAction(ISD::BSWAP, EVT::i64, Expand);
-
- setOperationAction(ISD::CTPOP, EVT::i8, Custom);
- setOperationAction(ISD::CTPOP, EVT::i16, Custom);
- setOperationAction(ISD::CTPOP, EVT::i32, Custom);
- setOperationAction(ISD::CTPOP, EVT::i64, Custom);
- setOperationAction(ISD::CTPOP, EVT::i128, Expand);
-
- setOperationAction(ISD::CTTZ , EVT::i8, Expand);
- setOperationAction(ISD::CTTZ , EVT::i16, Expand);
- setOperationAction(ISD::CTTZ , EVT::i32, Expand);
- setOperationAction(ISD::CTTZ , EVT::i64, Expand);
- setOperationAction(ISD::CTTZ , EVT::i128, Expand);
-
- setOperationAction(ISD::CTLZ , EVT::i8, Promote);
- setOperationAction(ISD::CTLZ , EVT::i16, Promote);
- setOperationAction(ISD::CTLZ , EVT::i32, Legal);
- setOperationAction(ISD::CTLZ , EVT::i64, Expand);
- setOperationAction(ISD::CTLZ , EVT::i128, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i128, Expand);
+
+ setOperationAction(ISD::CTTZ , MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i128, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand);
+
+ setOperationAction(ISD::CTLZ , MVT::i8, Promote);
+ setOperationAction(ISD::CTLZ , MVT::i16, Promote);
+ setOperationAction(ISD::CTLZ , MVT::i32, Legal);
+ setOperationAction(ISD::CTLZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i128, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand);
// SPU has a version of select that implements (a&~c)|(b&c), just like
// select ought to work:
- setOperationAction(ISD::SELECT, EVT::i8, Legal);
- setOperationAction(ISD::SELECT, EVT::i16, Legal);
- setOperationAction(ISD::SELECT, EVT::i32, Legal);
- setOperationAction(ISD::SELECT, EVT::i64, Legal);
+ setOperationAction(ISD::SELECT, MVT::i8, Legal);
+ setOperationAction(ISD::SELECT, MVT::i16, Legal);
+ setOperationAction(ISD::SELECT, MVT::i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::i64, Legal);
- setOperationAction(ISD::SETCC, EVT::i8, Legal);
- setOperationAction(ISD::SETCC, EVT::i16, Legal);
- setOperationAction(ISD::SETCC, EVT::i32, Legal);
- setOperationAction(ISD::SETCC, EVT::i64, Legal);
- setOperationAction(ISD::SETCC, EVT::f64, Custom);
+ setOperationAction(ISD::SETCC, MVT::i8, Legal);
+ setOperationAction(ISD::SETCC, MVT::i16, Legal);
+ setOperationAction(ISD::SETCC, MVT::i32, Legal);
+ setOperationAction(ISD::SETCC, MVT::i64, Legal);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
// Custom lower i128 -> i64 truncates
- setOperationAction(ISD::TRUNCATE, EVT::i64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_SINT, EVT::i8, Promote);
- setOperationAction(ISD::FP_TO_UINT, EVT::i8, Promote);
- setOperationAction(ISD::FP_TO_SINT, EVT::i16, Promote);
- setOperationAction(ISD::FP_TO_UINT, EVT::i16, Promote);
+ // Custom lower i32/i64 -> i128 sign extend
+ setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
// SPU has a legal FP -> signed INT instruction for f32, but for f64, need
// to expand to a libcall, hence the custom lowering:
- setOperationAction(ISD::FP_TO_SINT, EVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, EVT::i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, EVT::i64, Expand);
- setOperationAction(ISD::FP_TO_UINT, EVT::i64, Expand);
- setOperationAction(ISD::FP_TO_SINT, EVT::i128, Expand);
- setOperationAction(ISD::FP_TO_UINT, EVT::i128, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
// FDIV on SPU requires custom lowering
- setOperationAction(ISD::FDIV, EVT::f64, Expand); // to libcall
+ setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
// SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
- setOperationAction(ISD::SINT_TO_FP, EVT::i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, EVT::i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, EVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, EVT::i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, EVT::i16, Promote);
- setOperationAction(ISD::UINT_TO_FP, EVT::i8, Promote);
- setOperationAction(ISD::SINT_TO_FP, EVT::i64, Custom);
- setOperationAction(ISD::UINT_TO_FP, EVT::i64, Custom);
-
- setOperationAction(ISD::BIT_CONVERT, EVT::i32, Legal);
- setOperationAction(ISD::BIT_CONVERT, EVT::f32, Legal);
- setOperationAction(ISD::BIT_CONVERT, EVT::i64, Legal);
- setOperationAction(ISD::BIT_CONVERT, EVT::f64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+
+ setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f64, Legal);
// We cannot sextinreg(i1). Expand to shifts.
- setOperationAction(ISD::SIGN_EXTEND_INREG, EVT::i1, Expand);
-
- // Support label based line numbers.
- setOperationAction(ISD::DBG_STOPPOINT, EVT::Other, Expand);
- setOperationAction(ISD::DEBUG_LOC, EVT::Other, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
- for (unsigned sctype = (unsigned) EVT::i8; sctype < (unsigned) EVT::f128;
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
++sctype) {
- EVT::SimpleValueType VT = (EVT::SimpleValueType)sctype;
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
setOperationAction(ISD::GlobalAddress, VT, Custom);
setOperationAction(ISD::ConstantPool, VT, Custom);
}
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
- setOperationAction(ISD::VASTART , EVT::Other, Custom);
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
// Use the default implementation.
- setOperationAction(ISD::VAARG , EVT::Other, Expand);
- setOperationAction(ISD::VACOPY , EVT::Other, Expand);
- setOperationAction(ISD::VAEND , EVT::Other, Expand);
- setOperationAction(ISD::STACKSAVE , EVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE , EVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, EVT::i32 , Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, EVT::i64 , Expand);
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
// Cell SPU has instructions for converting between i64 and fp.
- setOperationAction(ISD::FP_TO_SINT, EVT::i64, Custom);
- setOperationAction(ISD::SINT_TO_FP, EVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
- setOperationAction(ISD::FP_TO_UINT, EVT::i32, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
- setOperationAction(ISD::BUILD_PAIR, EVT::i64, Expand);
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
- addRegisterClass(EVT::v16i8, SPU::VECREGRegisterClass);
- addRegisterClass(EVT::v8i16, SPU::VECREGRegisterClass);
- addRegisterClass(EVT::v4i32, SPU::VECREGRegisterClass);
- addRegisterClass(EVT::v2i64, SPU::VECREGRegisterClass);
- addRegisterClass(EVT::v4f32, SPU::VECREGRegisterClass);
- addRegisterClass(EVT::v2f64, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
- // "Odd size" vector classes that we're willing to support:
- addRegisterClass(EVT::v2i32, SPU::VECREGRegisterClass);
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
- for (unsigned i = (unsigned)EVT::FIRST_VECTOR_VALUETYPE;
- i <= (unsigned)EVT::LAST_VECTOR_VALUETYPE; ++i) {
- EVT::SimpleValueType VT = (EVT::SimpleValueType)i;
+ // Set operation actions to legal types only.
+ if (!isTypeLegal(VT)) continue;
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::AND, VT, Legal);
setOperationAction(ISD::OR, VT, Legal);
setOperationAction(ISD::XOR, VT, Legal);
- setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::SELECT, VT, Legal);
- setOperationAction(ISD::STORE, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Custom);
// These operations need to be expanded:
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
+ // Expand all trunc stores
+ for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
+ MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j;
+ setTruncStoreAction(VT, TargetVT, Expand);
+ }
+
// Custom lower build_vector, constant pool spills, insert and
// extract vector elements:
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
- setOperationAction(ISD::AND, EVT::v16i8, Custom);
- setOperationAction(ISD::OR, EVT::v16i8, Custom);
- setOperationAction(ISD::XOR, EVT::v16i8, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, EVT::v4f32, Custom);
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::AND, MVT::v16i8, Custom);
+ setOperationAction(ISD::OR, MVT::v16i8, Custom);
+ setOperationAction(ISD::XOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
- setOperationAction(ISD::FDIV, EVT::v4f32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
- setShiftAmountType(EVT::i32);
setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
setStackPointerRegisterToSaveRestore(SPU::R1);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
+ setMinFunctionAlignment(3);
+
computeRegisterProperties();
// Set pre-RA register scheduler default to BURR, which produces slightly
// better code than the default (could also be TDRR, but TargetLowering.h
// needs a mod to support that model):
- setSchedulingPreference(SchedulingForRegPressure);
+ setSchedulingPreference(Sched::RegPressure);
}
const char *
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
- node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
- node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
- node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
- node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
- node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
+ node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
+ node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
return ((i != node_names.end()) ? i->second : 0);
}
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
- return 3;
-}
-
//===----------------------------------------------------------------------===//
// Return the Cell SPU's SETCC result type
//===----------------------------------------------------------------------===//
-EVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
- // i16 and i32 are valid SETCC result types
- return ((VT == EVT::i8 || VT == EVT::i16 || VT == EVT::i32) ?
- VT.getSimpleVT() :
- EVT::i32);
+EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
+ // i8, i16 and i32 are valid SETCC result types
+ MVT::SimpleValueType retval;
+
+ switch(VT.getSimpleVT().SimpleTy){
+ case MVT::i1:
+ case MVT::i8:
+ retval = MVT::i8; break;
+ case MVT::i16:
+ retval = MVT::i16; break;
+ case MVT::i32:
+ default:
+ retval = MVT::i32;
+ }
+ return retval;
}
//===----------------------------------------------------------------------===//
within a 16-byte block, we have to rotate to extract the requested element.
For extending loads, we also want to ensure that the following sequence is
- emitted, e.g. for EVT::f32 extending load to EVT::f64:
+ emitted, e.g. for MVT::f32 extending load to MVT::f64:
\verbatim
%1 v16i8,ch = load
EVT OutVT = Op.getValueType();
ISD::LoadExtType ExtType = LN->getExtensionType();
unsigned alignment = LN->getAlignment();
- const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
+ int pso = prefslotOffset(InVT);
DebugLoc dl = Op.getDebugLoc();
-
- switch (LN->getAddressingMode()) {
- case ISD::UNINDEXED: {
- SDValue result;
- SDValue basePtr = LN->getBasePtr();
- SDValue rotate;
-
- if (alignment == 16) {
- ConstantSDNode *CN;
-
- // Special cases for a known aligned load to simplify the base pointer
- // and the rotation amount:
- if (basePtr.getOpcode() == ISD::ADD
- && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
- // Known offset into basePtr
- int64_t offset = CN->getSExtValue();
- int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
-
- if (rotamt < 0)
- rotamt += 16;
-
- rotate = DAG.getConstant(rotamt, EVT::i16);
-
- // Simplify the base pointer for this case:
- basePtr = basePtr.getOperand(0);
- if ((offset & ~0xf) > 0) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & ~0xf), PtrVT));
- }
- } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
- || (basePtr.getOpcode() == SPUISD::IndirectAddr
- && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
- && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
- // Plain aligned a-form address: rotate into preferred slot
- // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
- int64_t rotamt = -vtm->prefslot_byte;
- if (rotamt < 0)
- rotamt += 16;
- rotate = DAG.getConstant(rotamt, EVT::i16);
- } else {
- // Offset the rotate amount by the basePtr and the preferred slot
- // byte offset
- int64_t rotamt = -vtm->prefslot_byte;
- if (rotamt < 0)
- rotamt += 16;
- rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(rotamt, PtrVT));
- }
- } else {
- // Unaligned load: must be more pessimistic about addressing modes:
- if (basePtr.getOpcode() == ISD::ADD) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- SDValue Flag;
-
- SDValue Op0 = basePtr.getOperand(0);
- SDValue Op1 = basePtr.getOperand(1);
-
- if (isa<ConstantSDNode>(Op1)) {
- // Convert the (add <ptr>, <const>) to an indirect address contained
- // in a register. Note that this is done because we need to avoid
- // creating a 0(reg) d-form address due to the SPU's block loads.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
- basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
- } else {
- // Convert the (add <arg1>, <arg2>) to an indirect address, which
- // will likely be lowered as a reg(reg) x-form address.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- }
- } else {
+ EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
+ (128 / InVT.getSizeInBits()));
+
+ // two sanity checks
+ assert( LN->getAddressingMode() == ISD::UNINDEXED
+ && "we should get only UNINDEXED adresses");
+ // clean aligned loads can be selected as-is
+ if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
+ return SDValue();
+
+ // Get pointerinfos to the memory chunk(s) that contain the data to load
+ uint64_t mpi_offset = LN->getPointerInfo().Offset;
+ mpi_offset -= mpi_offset%16;
+ MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
+ MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
+
+ SDValue result;
+ SDValue basePtr = LN->getBasePtr();
+ SDValue rotate;
+
+ if ((alignment%16) == 0) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and the rotation amount:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+ int64_t rotamt = int64_t((offset & 0xf) - pso);
+
+ if (rotamt < 0)
+ rotamt += 16;
+
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ if ((offset & ~0xf) > 0) {
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
- DAG.getConstant(0, PtrVT));
+ DAG.getConstant((offset & ~0xf), PtrVT));
}
-
+ } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+ || (basePtr.getOpcode() == SPUISD::IndirectAddr
+ && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+ && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+ // Plain aligned a-form address: rotate into preferred slot
+ // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+ int64_t rotamt = -pso;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+ } else {
// Offset the rotate amount by the basePtr and the preferred slot
// byte offset
+ int64_t rotamt = -pso;
+ if (rotamt < 0)
+ rotamt += 16;
rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
basePtr,
- DAG.getConstant(-vtm->prefslot_byte, PtrVT));
+ DAG.getConstant(rotamt, PtrVT));
}
-
- // Re-emit as a v16i8 vector load
- result = DAG.getLoad(EVT::v16i8, dl, the_chain, basePtr,
- LN->getSrcValue(), LN->getSrcValueOffset(),
- LN->isVolatile(), 16);
-
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(-pso, PtrVT));
+ }
+
+ // Do the load as a i128 to allow possible shifting
+ SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
+ lowMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(), false, 16);
+
+ // When the size is not greater than alignment we get all data with just
+ // one load
+ if (alignment >= InVT.getSizeInBits()/8) {
// Update the chain
- the_chain = result.getValue(1);
+ the_chain = low.getValue(1);
// Rotate into the preferred slot:
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, EVT::v16i8,
- result.getValue(0), rotate);
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
+ low.getValue(0), rotate);
// Convert the loaded v16i8 vector to the appropriate vector type
// specified by the operand:
- EVT vecVT = EVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT, (128 / InVT.getSizeInBits()));
result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
- DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
+ DAG.getNode(ISD::BITCAST, dl, vecVT, result));
+ }
+ // When alignment is less than the size, we might need (known only at
+ // run-time) two loads
+ // TODO: if the memory address is composed only from constants, we have
+ // extra kowledge, and might avoid the second load
+ else {
+ // storage position offset from lower 16 byte aligned memory chunk
+ SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+ basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
+ // get a registerfull of ones. (this implementation is a workaround: LLVM
+ // cannot handle 128 bit signed int constants)
+ SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
+ ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+ SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
+ DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(16, PtrVT)),
+ highMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(), false,
+ 16);
+
+ the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+ high.getValue(1));
+
+ // Shift the (possible) high part right to compensate the misalignemnt.
+ // if there is no highpart (i.e. value is i64 and offset is 4), this
+ // will zero out the high value.
+ high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
+ DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ offset
+ ));
+
+ // Shift the low similarly
+ // TODO: add SPUISD::SHL_BYTES
+ low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
+
+ // Merge the two parts
+ result = DAG.getNode(ISD::BITCAST, dl, vecVT,
+ DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
+
+ if (!InVT.isVector()) {
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
+ }
+ }
// Handle extending loads by extending the scalar result:
if (ExtType == ISD::SEXTLOAD) {
result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
result = DAG.getNode(NewOpc, dl, OutVT, result);
}
- SDVTList retvts = DAG.getVTList(OutVT, EVT::Other);
+ SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
SDValue retops[2] = {
result,
the_chain
result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
retops, sizeof(retops) / sizeof(retops[0]));
return result;
- }
- case ISD::PRE_INC:
- case ISD::PRE_DEC:
- case ISD::POST_INC:
- case ISD::POST_DEC:
- case ISD::LAST_INDEXED_MODE:
- {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
- "UNINDEXED\n";
- Msg << (unsigned) LN->getAddressingMode();
- llvm_report_error(Msg.str());
- /*NOTREACHED*/
- }
- }
-
- return SDValue();
}
/// Custom lower stores for CellSPU
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
DebugLoc dl = Op.getDebugLoc();
unsigned alignment = SN->getAlignment();
+ SDValue result;
+ EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
+ (128 / StVT.getSizeInBits()));
+ // Get pointerinfos to the memory chunk(s) that contain the data to load
+ uint64_t mpi_offset = SN->getPointerInfo().Offset;
+ mpi_offset -= mpi_offset%16;
+ MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
+ MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
+
+
+ // two sanity checks
+ assert( SN->getAddressingMode() == ISD::UNINDEXED
+ && "we should get only UNINDEXED adresses");
+ // clean aligned loads can be selected as-is
+ if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
+ return SDValue();
+
+ SDValue alignLoadVec;
+ SDValue basePtr = SN->getBasePtr();
+ SDValue the_chain = SN->getChain();
+ SDValue insertEltOffs;
+
+ if ((alignment%16) == 0) {
+ ConstantSDNode *CN;
+ // Special cases for a known aligned load to simplify the base pointer
+ // and insertion byte:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & 0xf), PtrVT));
- switch (SN->getAddressingMode()) {
- case ISD::UNINDEXED: {
- // The vector type we really want to load from the 16-byte chunk.
- EVT vecVT = EVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
- stVecVT = EVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
-
- SDValue alignLoadVec;
- SDValue basePtr = SN->getBasePtr();
- SDValue the_chain = SN->getChain();
- SDValue insertEltOffs;
-
- if (alignment == 16) {
- ConstantSDNode *CN;
-
- // Special cases for a known aligned load to simplify the base pointer
- // and insertion byte:
- if (basePtr.getOpcode() == ISD::ADD
- && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
- // Known offset into basePtr
- int64_t offset = CN->getSExtValue();
-
- // Simplify the base pointer for this case:
- basePtr = basePtr.getOperand(0);
- insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & 0xf), PtrVT));
-
- if ((offset & ~0xf) > 0) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & ~0xf), PtrVT));
- }
- } else {
- // Otherwise, assume it's at byte 0 of basePtr
- insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
- } else {
- // Unaligned load: must be more pessimistic about addressing modes:
- if (basePtr.getOpcode() == ISD::ADD) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- SDValue Flag;
-
- SDValue Op0 = basePtr.getOperand(0);
- SDValue Op1 = basePtr.getOperand(1);
-
- if (isa<ConstantSDNode>(Op1)) {
- // Convert the (add <ptr>, <const>) to an indirect address contained
- // in a register. Note that this is done because we need to avoid
- // creating a 0(reg) d-form address due to the SPU's block loads.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
- basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
- } else {
- // Convert the (add <arg1>, <arg2>) to an indirect address, which
- // will likely be lowered as a reg(reg) x-form address.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- }
- } else {
+ if ((offset & ~0xf) > 0) {
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
- DAG.getConstant(0, PtrVT));
+ DAG.getConstant((offset & ~0xf), PtrVT));
}
-
- // Insertion point is solely determined by basePtr's contents
- insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+ } else {
+ // Otherwise, assume it's at byte 0 of basePtr
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
DAG.getConstant(0, PtrVT));
}
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
- // Re-emit as a v16i8 vector load
- alignLoadVec = DAG.getLoad(EVT::v16i8, dl, the_chain, basePtr,
- SN->getSrcValue(), SN->getSrcValueOffset(),
- SN->isVolatile(), 16);
+ // Insertion point is solely determined by basePtr's contents
+ insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Load the lower part of the memory to which to store.
+ SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
+ lowMemPtr, SN->isVolatile(), SN->isNonTemporal(),
+ false, 16);
+ // if we don't need to store over the 16 byte boundary, one store suffices
+ if (alignment >= StVT.getSizeInBits()/8) {
// Update the chain
- the_chain = alignLoadVec.getValue(1);
+ the_chain = low.getValue(1);
- LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
+ LoadSDNode *LN = cast<LoadSDNode>(low);
SDValue theValue = SN->getValue();
- SDValue result;
if (StVT != VT
&& (theValue.getOpcode() == ISD::AssertZext
// to the stack pointer, which is always aligned.
#if !defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- cerr << "CellSPU LowerSTORE: basePtr = ";
+ errs() << "CellSPU LowerSTORE: basePtr = ";
basePtr.getNode()->dump(&DAG);
- cerr << "\n";
+ errs() << "\n";
}
#endif
- SDValue insertEltOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
- SDValue vectorizeOp =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
+ SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+ insertEltOffs);
+ SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+ theValue);
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
- vectorizeOp, alignLoadVec,
- DAG.getNode(ISD::BIT_CONVERT, dl,
- EVT::v4i32, insertEltOp));
+ vectorizeOp, low,
+ DAG.getNode(ISD::BITCAST, dl,
+ MVT::v4i32, insertEltOp));
result = DAG.getStore(the_chain, dl, result, basePtr,
- LN->getSrcValue(), LN->getSrcValueOffset(),
- LN->isVolatile(), LN->getAlignment());
-
-#if 0 && !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- const SDValue ¤tRoot = DAG.getRoot();
-
- DAG.setRoot(result);
- cerr << "------- CellSPU:LowerStore result:\n";
- DAG.dump();
- cerr << "-------\n";
- DAG.setRoot(currentRoot);
- }
-#endif
-
- return result;
- /*UNREACHED*/
- }
- case ISD::PRE_INC:
- case ISD::PRE_DEC:
- case ISD::POST_INC:
- case ISD::POST_DEC:
- case ISD::LAST_INDEXED_MODE:
- {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
- "UNINDEXED\n";
- Msg << (unsigned) SN->getAddressingMode();
- llvm_report_error(Msg.str());
- /*NOTREACHED*/
- }
+ lowMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(),
+ 16);
+
+ }
+ // do the store when it might cross the 16 byte memory access boundary.
+ else {
+ // TODO issue a warning if SN->isVolatile()== true? This is likely not
+ // what the user wanted.
+
+ // address offset from nearest lower 16byte alinged address
+ SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+ SN->getBasePtr(),
+ DAG.getConstant(0xf, MVT::i32));
+ // 16 - offset
+ SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ offset);
+ // 16 - sizeof(Value)
+ SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ DAG.getConstant( VT.getSizeInBits()/8,
+ MVT::i32));
+ // get a registerfull of ones
+ SDValue ones = DAG.getConstant(-1, MVT::v4i32);
+ ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+ // Create the 128 bit masks that have ones where the data to store is
+ // located.
+ SDValue lowmask, himask;
+ // if the value to store don't fill up the an entire 128 bits, zero
+ // out the last bits of the mask so that only the value we want to store
+ // is masked.
+ // this is e.g. in the case of store i32, align 2
+ if (!VT.isVector()){
+ Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
+ lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
+ lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+ surplus);
+ Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+ Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
+
+ }
+ else {
+ lowmask = ones;
+ Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+ }
+ // this will zero, if there are no data that goes to the high quad
+ himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+ offset_compl);
+ lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
+ offset);
+
+ // Load in the old data and zero out the parts that will be overwritten with
+ // the new data to store.
+ SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
+ DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+ DAG.getConstant( 16, PtrVT)),
+ highMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(),
+ false, 16);
+ the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+ hi.getValue(1));
+
+ low = DAG.getNode(ISD::AND, dl, MVT::i128,
+ DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
+ DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
+ hi = DAG.getNode(ISD::AND, dl, MVT::i128,
+ DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
+ DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
+
+ // Shift the Value to store into place. rlow contains the parts that go to
+ // the lower memory chunk, rhi has the parts that go to the upper one.
+ SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
+ rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
+ SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
+ offset_compl);
+
+ // Merge the old data and the new data and store the results
+ // Need to convert vectors here to integer as 'OR'ing floats assert
+ rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
+ rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
+
+ low = DAG.getStore(the_chain, dl, rlow, basePtr,
+ lowMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ hi = DAG.getStore(the_chain, dl, rhi,
+ DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+ DAG.getConstant( 16, PtrVT)),
+ highMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
+ hi.getValue(0));
}
- return SDValue();
+ return result;
}
//! Generate the address of a constant pool entry.
LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
EVT PtrVT = Op.getValueType();
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- Constant *C = CP->getConstVal();
+ const Constant *C = CP->getConstVal();
SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
SDValue Zero = DAG.getConstant(0, PtrVT);
const TargetMachine &TM = DAG.getTarget();
llvm_unreachable("LowerConstantPool: Relocation model other than static"
" not supported.");
- return SDValue();
}
//! Alternate entry point for generating the address of a constant pool entry
llvm_unreachable("LowerJumpTable: Relocation model other than static"
" not supported.");
- return SDValue();
}
static SDValue
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
EVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
- GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
+ const GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ PtrVT, GSDN->getOffset());
const TargetMachine &TM = DAG.getTarget();
SDValue Zero = DAG.getConstant(0, PtrVT);
// FIXME there is no actual debug info here
return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
}
} else {
- llvm_report_error("LowerGlobalAddress: Relocation model other than static"
+ report_fatal_error("LowerGlobalAddress: Relocation model other than static"
"not supported.");
/*NOTREACHED*/
}
-
- return SDValue();
}
//! Custom lower double precision floating point constants
// FIXME there is no actual debug info here
DebugLoc dl = Op.getDebugLoc();
- if (VT == EVT::f64) {
+ if (VT == MVT::f64) {
ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
assert((FP != 0) &&
"LowerConstantFP: Node is not ConstantFPSDNode");
uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
- SDValue T = DAG.getConstant(dbits, EVT::i64);
- SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v2i64, T, T);
+ SDValue T = DAG.getConstant(dbits, MVT::i64);
+ SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
- DAG.getNode(ISD::BIT_CONVERT, dl, EVT::v2f64, Tvec));
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
}
return SDValue();
SDValue
SPUTargetLowering::LowerFormalArguments(SDValue Chain,
- unsigned CallConv, bool isVarArg,
+ CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) {
+ SmallVectorImpl<SDValue> &InVals)
+ const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
- const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
- const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
-
- unsigned ArgOffset = SPUFrameInfo::minStackSize();
+ unsigned ArgOffset = SPUFrameLowering::minStackSize();
unsigned ArgRegIdx = 0;
- unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
+ unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
+
// Add DAG nodes to load the arguments or copy them out of registers.
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
SDValue ArgVal;
+ CCValAssign &VA = ArgLocs[ArgNo];
- if (ArgRegIdx < NumArgRegs) {
+ if (VA.isRegLoc()) {
const TargetRegisterClass *ArgRegClass;
- switch (ObjectVT.getSimpleVT()) {
- default: {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "LowerFormalArguments Unhandled argument type: "
- << ObjectVT.getEVTString();
- llvm_report_error(Msg.str());
- }
- case EVT::i8:
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("LowerFormalArguments Unhandled argument type: " +
+ Twine(ObjectVT.getEVTString()));
+ case MVT::i8:
ArgRegClass = &SPU::R8CRegClass;
break;
- case EVT::i16:
+ case MVT::i16:
ArgRegClass = &SPU::R16CRegClass;
break;
- case EVT::i32:
+ case MVT::i32:
ArgRegClass = &SPU::R32CRegClass;
break;
- case EVT::i64:
+ case MVT::i64:
ArgRegClass = &SPU::R64CRegClass;
break;
- case EVT::i128:
+ case MVT::i128:
ArgRegClass = &SPU::GPRCRegClass;
break;
- case EVT::f32:
+ case MVT::f32:
ArgRegClass = &SPU::R32FPRegClass;
break;
- case EVT::f64:
+ case MVT::f64:
ArgRegClass = &SPU::R64FPRegClass;
break;
- case EVT::v2f64:
- case EVT::v4f32:
- case EVT::v2i64:
- case EVT::v4i32:
- case EVT::v8i16:
- case EVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
ArgRegClass = &SPU::VECREGRegClass;
break;
}
unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
- RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++ArgRegIdx;
} else {
// We need to load the argument to a virtual register if we determined
// above that we ran out of physical registers of the appropriate type
// or we're forced to do vararg
- int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, false, 0);
ArgOffset += StackSlotSize;
}
// vararg handling:
if (isVarArg) {
- // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
+ // FIXME: we should be able to query the argument registers from
+ // tablegen generated code.
+ static const uint16_t ArgRegs[] = {
+ SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
+ SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
+ SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
+ SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
+ SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
+ SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
+ SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
+ SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
+ SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
+ SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
+ SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
+ };
+ // size of ArgRegs array
+ const unsigned NumArgRegs = 77;
+
// We will spill (79-3)+1 registers to the stack
SmallVector<SDValue, 79-3+1> MemOps;
// Create the frame slot
-
for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
- VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
- SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
- SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], EVT::v16i8);
- SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
+ SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
+ SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
+ false, false, 0);
Chain = Store.getOperand(0);
MemOps.push_back(Store);
ArgOffset += StackSlotSize;
}
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, EVT::Other,
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
}
(Addr << 14 >> 14) != Addr)
return 0; // Top 14 bits have to be sext of immediate.
- return DAG.getConstant((int)C->getZExtValue() >> 2, EVT::i32).getNode();
+ return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
}
SDValue
SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- unsigned CallConv, bool isVarArg,
- bool isTailCall,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) {
+ SmallVectorImpl<SDValue> &InVals) const {
+ // CellSPU target does not yet support tail call optimization.
+ isTailCall = false;
const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
unsigned NumOps = Outs.size();
- unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
- const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
- const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
+ unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
+
+ const unsigned NumArgRegs = ArgLocs.size();
+
// Handy pointer type
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- // Accumulate how many bytes are to be pushed on the stack, including the
- // linkage area, and parameter passing area. According to the SPU ABI,
- // we minimally need space for [LR] and [SP]
- unsigned NumStackBytes = SPUFrameInfo::minStackSize();
-
// Set up a copy of the stack pointer for use loading and storing any
// arguments that may not fit in the registers available for argument
// passing.
- SDValue StackPtr = DAG.getRegister(SPU::R1, EVT::i32);
+ SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
// Figure out which arguments are going to go in registers, and which in
// memory.
- unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
+ unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
unsigned ArgRegIdx = 0;
// Keep track of registers passing arguments
// And the arguments passed on the stack
SmallVector<SDValue, 8> MemOpChains;
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = Outs[i].Val;
+ for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
+ SDValue Arg = OutVals[ArgRegIdx];
+ CCValAssign &VA = ArgLocs[ArgRegIdx];
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- switch (Arg.getValueType().getSimpleVT()) {
+ switch (Arg.getValueType().getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
- case EVT::i8:
- case EVT::i16:
- case EVT::i32:
- case EVT::i64:
- case EVT::i128:
- if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
- } else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
- ArgOffset += StackSlotSize;
- }
- break;
- case EVT::f32:
- case EVT::f64:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::i128:
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
- ArgOffset += StackSlotSize;
- }
- break;
- case EVT::v2i64:
- case EVT::v2f64:
- case EVT::v4f32:
- case EVT::v4i32:
- case EVT::v8i16:
- case EVT::v16i8:
- if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
- } else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
ArgOffset += StackSlotSize;
}
break;
}
}
- // Update number of stack bytes actually used, insert a call sequence start
- NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
+ // Accumulate how many bytes are to be pushed on the stack, including the
+ // linkage area, and parameter passing area. According to the SPU ABI,
+ // we minimally need space for [LR] and [SP].
+ unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
+
+ // Insert a call sequence start
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
true));
if (!MemOpChains.empty()) {
// Adjust the stack pointer for the stack arguments.
- Chain = DAG.getNode(ISD::TokenFactor, dl, EVT::Other,
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
}
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- GlobalValue *GV = G->getGlobal();
+ const GlobalValue *GV = G->getGlobal();
EVT CalleeVT = Callee.getValueType();
SDValue Zero = DAG.getConstant(0, PtrVT);
- SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
if (!ST->usingLargeMem()) {
// Turn calls to targets that are defined (i.e., have bodies) into BRSL
if (InFlag.getNode())
Ops.push_back(InFlag);
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(EVT::Other, EVT::Flag),
+ Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
&Ops[0], Ops.size());
InFlag = Chain.getValue(1);
if (Ins.empty())
return Chain;
+ // Now handle the return value(s)
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
// If the call has results, copy the values out of the ret val registers.
- switch (Ins[0].VT.getSimpleVT()) {
- default: llvm_unreachable("Unexpected ret value!");
- case EVT::Other: break;
- case EVT::i32:
- if (Ins.size() > 1 && Ins[1].VT == EVT::i32) {
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
- EVT::i32, InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, EVT::i32,
- Chain.getValue(2)).getValue(1);
- InVals.push_back(Chain.getValue(0));
- } else {
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, EVT::i32,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- }
- break;
- case EVT::i64:
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, EVT::i64,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- break;
- case EVT::i128:
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, EVT::i128,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- break;
- case EVT::f32:
- case EVT::f64:
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- break;
- case EVT::v2f64:
- case EVT::v2i64:
- case EVT::v4f32:
- case EVT::v4i32:
- case EVT::v8i16:
- case EVT::v16i8:
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- break;
- }
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ InVals.push_back(Val);
+ }
return Chain;
}
SDValue
SPUTargetLowering::LowerReturn(SDValue Chain,
- unsigned CallConv, bool isVarArg,
+ CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
- DebugLoc dl, SelectionDAG &DAG) {
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
- RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
// If this is the first return lowered for this function, add the regs to the
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
Flag = Chain.getValue(1);
}
if (Flag.getNode())
- return DAG.getNode(SPUISD::RET_FLAG, dl, EVT::Other, Chain, Flag);
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
else
- return DAG.getNode(SPUISD::RET_FLAG, dl, EVT::Other, Chain);
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
}
EVT ValueType) {
if (ConstantSDNode *CN = getVecImm(N)) {
uint64_t Value = CN->getZExtValue();
- if (ValueType == EVT::i64) {
+ if (ValueType == MVT::i64) {
uint64_t UValue = CN->getZExtValue();
uint32_t upper = uint32_t(UValue >> 32);
uint32_t lower = uint32_t(UValue);
EVT ValueType) {
if (ConstantSDNode *CN = getVecImm(N)) {
int64_t Value = CN->getSExtValue();
- if (ValueType == EVT::i64) {
+ if (ValueType == MVT::i64) {
uint64_t UValue = CN->getZExtValue();
uint32_t upper = uint32_t(UValue >> 32);
uint32_t lower = uint32_t(UValue);
EVT ValueType) {
if (ConstantSDNode *CN = getVecImm(N)) {
int64_t Value = CN->getSExtValue();
- if (ValueType == EVT::i64) {
+ if (ValueType == MVT::i64) {
uint64_t UValue = CN->getZExtValue();
uint32_t upper = uint32_t(UValue >> 32);
uint32_t lower = uint32_t(UValue);
return SDValue();
Value = Value >> 32;
}
- if (isS10Constant(Value))
+ if (isInt<10>(Value))
return DAG.getTargetConstant(Value, ValueType);
}
EVT ValueType) {
if (ConstantSDNode *CN = getVecImm(N)) {
int Value = (int) CN->getZExtValue();
- if (ValueType == EVT::i16
+ if (ValueType == MVT::i16
&& Value <= 0xffff /* truncated from uint64_t */
&& ((short) Value >> 8) == ((short) Value & 0xff))
return DAG.getTargetConstant(Value & 0xff, ValueType);
- else if (ValueType == EVT::i8
+ else if (ValueType == MVT::i8
&& (Value & 0xff) == Value)
return DAG.getTargetConstant(Value, ValueType);
}
EVT ValueType) {
if (ConstantSDNode *CN = getVecImm(N)) {
uint64_t Value = CN->getZExtValue();
- if ((ValueType == EVT::i32
+ if ((ValueType == MVT::i32
&& ((unsigned) Value & 0xffff0000) == (unsigned) Value)
- || (ValueType == EVT::i64 && (Value & 0xffff0000) == Value))
+ || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
return DAG.getTargetConstant(Value >> 16, ValueType);
}
/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
if (ConstantSDNode *CN = getVecImm(N)) {
- return DAG.getTargetConstant((unsigned) CN->getZExtValue(), EVT::i32);
+ return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
}
return SDValue();
/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
if (ConstantSDNode *CN = getVecImm(N)) {
- return DAG.getTargetConstant((unsigned) CN->getZExtValue(), EVT::i64);
+ return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
}
return SDValue();
uint64_t SplatBits = APSplatBits.getZExtValue();
- switch (VT.getSimpleVT()) {
- default: {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
- << VT.getEVTString();
- llvm_report_error(Msg.str());
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
+ Twine(VT.getEVTString()));
/*NOTREACHED*/
- }
- case EVT::v4f32: {
+ case MVT::v4f32: {
uint32_t Value32 = uint32_t(SplatBits);
assert(SplatBitSize == 32
&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
- SDValue T = DAG.getConstant(Value32, EVT::i32);
- return DAG.getNode(ISD::BIT_CONVERT, dl, EVT::v4f32,
- DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32, T,T,T,T));
- break;
+ SDValue T = DAG.getConstant(Value32, MVT::i32);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
}
- case EVT::v2f64: {
+ case MVT::v2f64: {
uint64_t f64val = uint64_t(SplatBits);
assert(SplatBitSize == 64
&& "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
- SDValue T = DAG.getConstant(f64val, EVT::i64);
- return DAG.getNode(ISD::BIT_CONVERT, dl, EVT::v2f64,
- DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v2i64, T, T));
- break;
+ SDValue T = DAG.getConstant(f64val, MVT::i64);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
}
- case EVT::v16i8: {
+ case MVT::v16i8: {
// 8-bit constants have to be expanded to 16-bits
unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
SmallVector<SDValue, 8> Ops;
- Ops.assign(8, DAG.getConstant(Value16, EVT::i16));
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v8i16, &Ops[0], Ops.size()));
+ Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
}
- case EVT::v8i16: {
+ case MVT::v8i16: {
unsigned short Value16 = SplatBits;
SDValue T = DAG.getConstant(Value16, EltVT);
SmallVector<SDValue, 8> Ops;
Ops.assign(8, T);
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
}
- case EVT::v4i32: {
+ case MVT::v4i32: {
SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
}
- case EVT::v2i32: {
- SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
- }
- case EVT::v2i64: {
+ case MVT::v2i64: {
return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
}
}
-
- return SDValue();
}
/*!
if (upper == lower) {
// Magic constant that can be matched by IL, ILA, et. al.
- SDValue Val = DAG.getTargetConstant(upper, EVT::i32);
- return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
+ SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
+ return DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
Val, Val, Val, Val));
} else {
bool upper_special, lower_special;
// Both upper and lower are special, lower to a constant pool load:
if (lower_special && upper_special) {
- SDValue SplatValCN = DAG.getConstant(SplatVal, EVT::i64);
- return DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v2i64,
- SplatValCN, SplatValCN);
+ SDValue UpperVal = DAG.getConstant(upper, MVT::i32);
+ SDValue LowerVal = DAG.getConstant(lower, MVT::i32);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ UpperVal, LowerVal, UpperVal, LowerVal);
+ return DAG.getNode(ISD::BITCAST, dl, OpVT, BV);
}
SDValue LO32;
// Create lower vector if not a special pattern
if (!lower_special) {
- SDValue LO32C = DAG.getConstant(lower, EVT::i32);
- LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
+ SDValue LO32C = DAG.getConstant(lower, MVT::i32);
+ LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
LO32C, LO32C, LO32C, LO32C));
}
// Create upper vector if not a special pattern
if (!upper_special) {
- SDValue HI32C = DAG.getConstant(upper, EVT::i32);
- HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
+ SDValue HI32C = DAG.getConstant(upper, MVT::i32);
+ HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
HI32C, HI32C, HI32C, HI32C));
}
val |= i * 4 + j + ((i & 1) * 16);
}
- ShufBytes.push_back(DAG.getConstant(val, EVT::i32));
+ ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
}
return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
- DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
&ShufBytes[0], ShufBytes.size()));
}
}
// If we have a single element being moved from V1 to V2, this can be handled
// using the C*[DX] compute mask instructions, but the vector elements have
- // to be monotonically increasing with one exception element.
+ // to be monotonically increasing with one exception element, and the source
+ // slot of the element to move must be the same as the destination.
EVT VecVT = V1.getValueType();
EVT EltVT = VecVT.getVectorElementType();
unsigned EltsFromV2 = 0;
- unsigned V2Elt = 0;
+ unsigned V2EltOffset = 0;
unsigned V2EltIdx0 = 0;
unsigned CurrElt = 0;
unsigned MaxElts = VecVT.getVectorNumElements();
unsigned PrevElt = 0;
- unsigned V0Elt = 0;
bool monotonic = true;
bool rotate = true;
+ int rotamt=0;
+ EVT maskVT; // which of the c?d instructions to use
- if (EltVT == EVT::i8) {
+ if (EltVT == MVT::i8) {
V2EltIdx0 = 16;
- } else if (EltVT == EVT::i16) {
+ maskVT = MVT::v16i8;
+ } else if (EltVT == MVT::i16) {
V2EltIdx0 = 8;
- } else if (EltVT == EVT::i32 || EltVT == EVT::f32) {
+ maskVT = MVT::v8i16;
+ } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
V2EltIdx0 = 4;
- } else if (EltVT == EVT::i64 || EltVT == EVT::f64) {
+ maskVT = MVT::v4i32;
+ } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
V2EltIdx0 = 2;
+ maskVT = MVT::v2i64;
} else
llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
for (unsigned i = 0; i != MaxElts; ++i) {
if (SVN->getMaskElt(i) < 0)
continue;
-
+
unsigned SrcElt = SVN->getMaskElt(i);
if (monotonic) {
if (SrcElt >= V2EltIdx0) {
- if (1 >= (++EltsFromV2)) {
- V2Elt = (V2EltIdx0 - SrcElt) << 2;
- }
+ // TODO: optimize for the monotonic case when several consecutive
+ // elements are taken form V2. Do we ever get such a case?
+ if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+ V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+ else
+ monotonic = false;
+ ++EltsFromV2;
} else if (CurrElt != SrcElt) {
monotonic = false;
}
if ((PrevElt == SrcElt - 1)
|| (PrevElt == MaxElts - 1 && SrcElt == 0)) {
PrevElt = SrcElt;
- if (SrcElt == 0)
- V0Elt = i;
} else {
rotate = false;
}
- } else if (PrevElt == 0) {
- // First time through, need to keep track of previous element
+ } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
+ // First time or after a "wrap around"
+ rotamt = SrcElt-i;
PrevElt = SrcElt;
} else {
// This isn't a rotation, takes elements from vector 2
if (EltsFromV2 == 1 && monotonic) {
// Compute mask and shuffle
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- // Initialize temporary register to 0
- SDValue InitTempReg =
- DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
- // Copy register's contents as index in SHUFFLE_MASK:
- SDValue ShufMaskOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, dl, EVT::v4i32,
- DAG.getTargetConstant(V2Elt, EVT::i32),
- DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
+
+ // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
+ // R1 ($sp) is used here only as it is guaranteed to have last bits zero
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(V2EltOffset, MVT::i32));
+ SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
+ maskVT, Pointer);
+
// Use shuffle mask in SHUFB synthetic instruction:
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
ShufMaskOp);
} else if (rotate) {
- int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
-
+ if (rotamt < 0)
+ rotamt +=MaxElts;
+ rotamt *= EltVT.getSizeInBits()/8;
return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
- V1, DAG.getConstant(rotamt, EVT::i16));
+ V1, DAG.getConstant(rotamt, MVT::i16));
} else {
// Convert the SHUFFLE_VECTOR mask's input element units to the
// actual bytes.
unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
for (unsigned j = 0; j < BytesPerElement; ++j)
- ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,EVT::i8));
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
}
-
- SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v16i8,
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
&ResultMask[0], ResultMask.size());
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
}
size_t n_copies;
// Create a constant vector:
- switch (Op.getValueType().getSimpleVT()) {
+ switch (Op.getValueType().getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected constant value type in "
"LowerSCALAR_TO_VECTOR");
- case EVT::v16i8: n_copies = 16; VT = EVT::i8; break;
- case EVT::v8i16: n_copies = 8; VT = EVT::i16; break;
- case EVT::v4i32: n_copies = 4; VT = EVT::i32; break;
- case EVT::v4f32: n_copies = 4; VT = EVT::f32; break;
- case EVT::v2i64: n_copies = 2; VT = EVT::i64; break;
- case EVT::v2f64: n_copies = 2; VT = EVT::f64; break;
+ case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
+ case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
+ case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
+ case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
+ case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
+ case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
}
SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
&ConstVecValues[0], ConstVecValues.size());
} else {
// Otherwise, copy the value from one register to another:
- switch (Op0.getValueType().getSimpleVT()) {
+ switch (Op0.getValueType().getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
- case EVT::i8:
- case EVT::i16:
- case EVT::i32:
- case EVT::i64:
- case EVT::f32:
- case EVT::f64:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::f32:
+ case MVT::f64:
return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
}
}
-
- return SDValue();
}
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
int EltNo = (int) C->getZExtValue();
// sanity checks:
- if (VT == EVT::i8 && EltNo >= 16)
+ if (VT == MVT::i8 && EltNo >= 16)
llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
- else if (VT == EVT::i16 && EltNo >= 8)
+ else if (VT == MVT::i16 && EltNo >= 8)
llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
- else if (VT == EVT::i32 && EltNo >= 4)
+ else if (VT == MVT::i32 && EltNo >= 4)
llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
- else if (VT == EVT::i64 && EltNo >= 2)
+ else if (VT == MVT::i64 && EltNo >= 2)
llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
- if (EltNo == 0 && (VT == EVT::i32 || VT == EVT::i64)) {
+ if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
// i32 and i64: Element 0 is the preferred slot
return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
}
int prefslot_begin = -1, prefslot_end = -1;
int elt_byte = EltNo * VT.getSizeInBits() / 8;
- switch (VT.getSimpleVT()) {
- default:
- assert(false && "Invalid value type!");
- case EVT::i8: {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid value type!");
+ case MVT::i8: {
prefslot_begin = prefslot_end = 3;
break;
}
- case EVT::i16: {
+ case MVT::i16: {
prefslot_begin = 2; prefslot_end = 3;
break;
}
- case EVT::i32:
- case EVT::f32: {
+ case MVT::i32:
+ case MVT::f32: {
prefslot_begin = 0; prefslot_end = 3;
break;
}
- case EVT::i64:
- case EVT::f64: {
+ case MVT::i64:
+ case MVT::f64: {
prefslot_begin = 0; prefslot_end = 7;
break;
}
assert(prefslot_begin != -1 && prefslot_end != -1 &&
"LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
- unsigned int ShufBytes[16];
+ unsigned int ShufBytes[16] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
for (int i = 0; i < 16; ++i) {
// zero fill uppper part of preferred slot, don't care about the
// other slots:
(ShufBytes[bidx+1] << 16) |
(ShufBytes[bidx+2] << 8) |
ShufBytes[bidx+3]);
- ShufMask[i] = DAG.getConstant(bits, EVT::i32);
+ ShufMask[i] = DAG.getConstant(bits, MVT::i32);
}
SDValue ShufMaskVec =
- DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
&ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
// Variable index: Rotate the requested element into slot 0, then replicate
// slot 0 across the vector
EVT VecVT = N.getValueType();
- if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
- llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
+ if (!VecVT.isSimple() || !VecVT.isVector()) {
+ report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
"vector type!");
}
// Make life easier by making sure the index is zero-extended to i32
- if (Elt.getValueType() != EVT::i32)
- Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, EVT::i32, Elt);
+ if (Elt.getValueType() != MVT::i32)
+ Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
// Scale the index to a bit/byte shift quantity
APInt scaleFactor =
if (scaleShift > 0) {
// Scale the shift factor:
- Elt = DAG.getNode(ISD::SHL, dl, EVT::i32, Elt,
- DAG.getConstant(scaleShift, EVT::i32));
+ Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
+ DAG.getConstant(scaleShift, MVT::i32));
}
- vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
+ vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
// Replicate the bytes starting at byte 0 across the entire vector (for
// consistency with the notion of a unified register set)
SDValue replicate;
- switch (VT.getSimpleVT()) {
+ switch (VT.getSimpleVT().SimpleTy) {
default:
- llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
+ report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
"type");
/*NOTREACHED*/
- case EVT::i8: {
- SDValue factor = DAG.getConstant(0x00000000, EVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
+ case MVT::i8: {
+ SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
factor, factor, factor, factor);
break;
}
- case EVT::i16: {
- SDValue factor = DAG.getConstant(0x00010001, EVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
+ case MVT::i16: {
+ SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
factor, factor, factor, factor);
break;
}
- case EVT::i32:
- case EVT::f32: {
- SDValue factor = DAG.getConstant(0x00010203, EVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
+ case MVT::i32:
+ case MVT::f32: {
+ SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
factor, factor, factor, factor);
break;
}
- case EVT::i64:
- case EVT::f64: {
- SDValue loFactor = DAG.getConstant(0x00010203, EVT::i32);
- SDValue hiFactor = DAG.getConstant(0x04050607, EVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
+ case MVT::i64:
+ case MVT::f64: {
+ SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
+ SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
loFactor, hiFactor, loFactor, hiFactor);
break;
}
SDValue IdxOp = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
+ EVT eltVT = ValOp.getValueType();
- ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
- assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+ // use 0 when the lane to insert to is 'undef'
+ int64_t Offset=0;
+ if (IdxOp.getOpcode() != ISD::UNDEF) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
+ assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+ Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
+ }
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Use $sp ($1) because it's always 16-byte aligned and it's available:
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(CN->getSExtValue(), PtrVT));
- SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
+ DAG.getConstant(Offset, PtrVT));
+ // widen the mask when dealing with half vectors
+ EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+ 128/ VT.getVectorElementType().getSizeInBits());
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
SDValue result =
DAG.getNode(SPUISD::SHUFB, dl, VT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
VecOp,
- DAG.getNode(ISD::BIT_CONVERT, dl, EVT::v4i32, ShufMask));
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
return result;
}
{
SDValue N0 = Op.getOperand(0); // Everything has at least one operand
DebugLoc dl = Op.getDebugLoc();
- EVT ShiftVT = TLI.getShiftAmountTy();
+ EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
- assert(Op.getValueType() == EVT::i8);
+ assert(Op.getValueType() == MVT::i8);
switch (Opc) {
default:
llvm_unreachable("Unhandled i8 math operator");
- /*NOTREACHED*/
- break;
case ISD::ADD: {
// 8-bit addition: Promote the arguments up to 16-bits and truncate
// the result:
SDValue N1 = Op.getOperand(1);
- N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N0);
- N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N1);
- return DAG.getNode(ISD::TRUNCATE, dl, EVT::i8,
- DAG.getNode(Opc, dl, EVT::i16, N0, N1));
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
}
// 8-bit subtraction: Promote the arguments up to 16-bits and truncate
// the result:
SDValue N1 = Op.getOperand(1);
- N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N0);
- N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N1);
- return DAG.getNode(ISD::TRUNCATE, dl, EVT::i8,
- DAG.getNode(Opc, dl, EVT::i16, N0, N1));
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
}
case ISD::ROTR:
case ISD::ROTL: {
SDValue N1 = Op.getOperand(1);
EVT N1VT = N1.getValueType();
- N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, EVT::i16, N0);
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
if (!N1VT.bitsEq(ShiftVT)) {
unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
? ISD::ZERO_EXTEND
// Replicate lower 8-bits into upper 8:
SDValue ExpandArg =
- DAG.getNode(ISD::OR, dl, EVT::i16, N0,
- DAG.getNode(ISD::SHL, dl, EVT::i16,
- N0, DAG.getConstant(8, EVT::i32)));
+ DAG.getNode(ISD::OR, dl, MVT::i16, N0,
+ DAG.getNode(ISD::SHL, dl, MVT::i16,
+ N0, DAG.getConstant(8, MVT::i32)));
// Truncate back down to i8
- return DAG.getNode(ISD::TRUNCATE, dl, EVT::i8,
- DAG.getNode(Opc, dl, EVT::i16, ExpandArg, N1));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
}
case ISD::SRL:
case ISD::SHL: {
SDValue N1 = Op.getOperand(1);
EVT N1VT = N1.getValueType();
- N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, EVT::i16, N0);
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
if (!N1VT.bitsEq(ShiftVT)) {
unsigned N1Opc = ISD::ZERO_EXTEND;
N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
}
- return DAG.getNode(ISD::TRUNCATE, dl, EVT::i8,
- DAG.getNode(Opc, dl, EVT::i16, N0, N1));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
}
case ISD::SRA: {
SDValue N1 = Op.getOperand(1);
EVT N1VT = N1.getValueType();
- N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N0);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
if (!N1VT.bitsEq(ShiftVT)) {
unsigned N1Opc = ISD::SIGN_EXTEND;
N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
}
- return DAG.getNode(ISD::TRUNCATE, dl, EVT::i8,
- DAG.getNode(Opc, dl, EVT::i16, N0, N1));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
}
case ISD::MUL: {
SDValue N1 = Op.getOperand(1);
- N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N0);
- N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N1);
- return DAG.getNode(ISD::TRUNCATE, dl, EVT::i8,
- DAG.getNode(Opc, dl, EVT::i16, N0, N1));
- break;
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
}
}
-
- return SDValue();
}
//! Lower byte immediate operations for v16i8 vectors:
ConstVec = Op.getOperand(0);
Arg = Op.getOperand(1);
if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
- if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+ if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
ConstVec = ConstVec.getOperand(0);
} else {
ConstVec = Op.getOperand(1);
Arg = Op.getOperand(0);
- if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+ if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
ConstVec = ConstVec.getOperand(0);
}
}
HasAnyUndefs, minSplatBits)
&& minSplatBits <= SplatBitSize) {
uint64_t SplatBits = APSplatBits.getZExtValue();
- SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, EVT::i8);
+ SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
SmallVector<SDValue, 16> tcVec;
tcVec.assign(16, tc);
*/
static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- EVT vecVT = EVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
DebugLoc dl = Op.getDebugLoc();
- switch (VT.getSimpleVT()) {
- default:
- assert(false && "Invalid value type!");
- case EVT::i8: {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid value type!");
+ case MVT::i8: {
SDValue N = Op.getOperand(0);
- SDValue Elt0 = DAG.getConstant(0, EVT::i32);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i32);
SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EVT::i8, CNTB, Elt0);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
}
- case EVT::i16: {
+ case MVT::i16: {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
SDValue N = Op.getOperand(0);
- SDValue Elt0 = DAG.getConstant(0, EVT::i16);
- SDValue Mask0 = DAG.getConstant(0x0f, EVT::i16);
- SDValue Shift1 = DAG.getConstant(8, EVT::i32);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i16);
+ SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
+ SDValue Shift1 = DAG.getConstant(8, MVT::i32);
SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
// CNTB_result becomes the chain to which all of the virtual registers
// CNTB_reg, SUM1_reg become associated:
SDValue CNTB_result =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EVT::i16, CNTB, Elt0);
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
SDValue CNTB_rescopy =
DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
- SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, EVT::i16);
+ SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
- return DAG.getNode(ISD::AND, dl, EVT::i16,
- DAG.getNode(ISD::ADD, dl, EVT::i16,
- DAG.getNode(ISD::SRL, dl, EVT::i16,
+ return DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::ADD, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, dl, MVT::i16,
Tmp1, Shift1),
Tmp1),
Mask0);
}
- case EVT::i32: {
+ case MVT::i32: {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
SDValue N = Op.getOperand(0);
- SDValue Elt0 = DAG.getConstant(0, EVT::i32);
- SDValue Mask0 = DAG.getConstant(0xff, EVT::i32);
- SDValue Shift1 = DAG.getConstant(16, EVT::i32);
- SDValue Shift2 = DAG.getConstant(8, EVT::i32);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+ SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
+ SDValue Shift1 = DAG.getConstant(16, MVT::i32);
+ SDValue Shift2 = DAG.getConstant(8, MVT::i32);
SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
// CNTB_result becomes the chain to which all of the virtual registers
// CNTB_reg, SUM1_reg become associated:
SDValue CNTB_result =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EVT::i32, CNTB, Elt0);
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
SDValue CNTB_rescopy =
DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
SDValue Comp1 =
- DAG.getNode(ISD::SRL, dl, EVT::i32,
- DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, EVT::i32),
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
Shift1);
SDValue Sum1 =
- DAG.getNode(ISD::ADD, dl, EVT::i32, Comp1,
- DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, EVT::i32));
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
SDValue Sum1_rescopy =
DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
SDValue Comp2 =
- DAG.getNode(ISD::SRL, dl, EVT::i32,
- DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, EVT::i32),
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
Shift2);
SDValue Sum2 =
- DAG.getNode(ISD::ADD, dl, EVT::i32, Comp2,
- DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, EVT::i32));
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
- return DAG.getNode(ISD::AND, dl, EVT::i32, Sum2, Mask0);
+ return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
}
- case EVT::i64:
+ case MVT::i64:
break;
}
All conversions to i64 are expanded to a libcall.
*/
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
- SPUTargetLowering &TLI) {
+ const SPUTargetLowering &TLI) {
EVT OpVT = Op.getValueType();
SDValue Op0 = Op.getOperand(0);
EVT Op0VT = Op0.getValueType();
- if ((OpVT == EVT::i32 && Op0VT == EVT::f64)
- || OpVT == EVT::i64) {
+ if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
+ || OpVT == MVT::i64) {
// Convert f32 / f64 to i32 / i64 via libcall.
RTLIB::Libcall LC =
(Op.getOpcode() == ISD::FP_TO_SINT)
All conversions from i64 are expanded to a libcall.
*/
static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
- SPUTargetLowering &TLI) {
+ const SPUTargetLowering &TLI) {
EVT OpVT = Op.getValueType();
SDValue Op0 = Op.getOperand(0);
EVT Op0VT = Op0.getValueType();
- if ((OpVT == EVT::f64 && Op0VT == EVT::i32)
- || Op0VT == EVT::i64) {
+ if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
+ || Op0VT == MVT::i64) {
// Convert i32, i64 to f64 via libcall:
RTLIB::Libcall LC =
(Op.getOpcode() == ISD::SINT_TO_FP)
//! Lower ISD::SETCC
/*!
- This handles EVT::f64 (double floating point) condition lowering
+ This handles MVT::f64 (double floating point) condition lowering
*/
static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
SDValue lhs = Op.getOperand(0);
SDValue rhs = Op.getOperand(1);
EVT lhsVT = lhs.getValueType();
- assert(lhsVT == EVT::f64 && "LowerSETCC: type other than EVT::64\n");
+ assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
- EVT IntVT(EVT::i64);
+ EVT IntVT(MVT::i64);
// Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
// selected to a NOP:
- SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
+ SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
SDValue lhsHi32 =
- DAG.getNode(ISD::TRUNCATE, dl, EVT::i32,
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRL, dl, IntVT,
- i64lhs, DAG.getConstant(32, EVT::i32)));
+ i64lhs, DAG.getConstant(32, MVT::i32)));
SDValue lhsHi32abs =
- DAG.getNode(ISD::AND, dl, EVT::i32,
- lhsHi32, DAG.getConstant(0x7fffffff, EVT::i32));
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
SDValue lhsLo32 =
- DAG.getNode(ISD::TRUNCATE, dl, EVT::i32, i64lhs);
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
// SETO and SETUO only use the lhs operand:
if (CC->get() == ISD::SETO) {
return DAG.getNode(ISD::AND, dl, ccResultVT,
DAG.getSetCC(dl, ccResultVT,
lhsHi32abs,
- DAG.getConstant(0x7ff00000, EVT::i32),
+ DAG.getConstant(0x7ff00000, MVT::i32),
ISD::SETGE),
DAG.getSetCC(dl, ccResultVT,
lhsLo32,
- DAG.getConstant(0, EVT::i32),
+ DAG.getConstant(0, MVT::i32),
ISD::SETGT));
}
- SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
+ SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
SDValue rhsHi32 =
- DAG.getNode(ISD::TRUNCATE, dl, EVT::i32,
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRL, dl, IntVT,
- i64rhs, DAG.getConstant(32, EVT::i32)));
+ i64rhs, DAG.getConstant(32, MVT::i32)));
// If a value is negative, subtract from the sign magnitude constant:
SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
// Convert the sign-magnitude representation into 2's complement:
SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
- lhsHi32, DAG.getConstant(31, EVT::i32));
+ lhsHi32, DAG.getConstant(31, MVT::i32));
SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
SDValue lhsSelect =
DAG.getNode(ISD::SELECT, dl, IntVT,
lhsSelectMask, lhsSignMag2TC, i64lhs);
SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
- rhsHi32, DAG.getConstant(31, EVT::i32));
+ rhsHi32, DAG.getConstant(31, MVT::i32));
SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
SDValue rhsSelect =
DAG.getNode(ISD::SELECT, dl, IntVT,
case ISD::SETONE:
compareOp = ISD::SETNE; break;
default:
- llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
+ report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
}
SDValue result =
if ((CC->get() & 0x8) == 0) {
// Ordered comparison:
SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
- lhs, DAG.getConstantFP(0.0, EVT::f64),
+ lhs, DAG.getConstantFP(0.0, MVT::f64),
ISD::SETO);
SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
- rhs, DAG.getConstantFP(0.0, EVT::f64),
+ rhs, DAG.getConstantFP(0.0, MVT::f64),
ISD::SETO);
SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
{
// Type to truncate to
EVT VT = Op.getValueType();
- EVT::SimpleValueType simpleVT = VT.getSimpleVT();
- EVT VecVT = EVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+ MVT simpleVT = VT.getSimpleVT();
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
DebugLoc dl = Op.getDebugLoc();
// Type to truncate from
SDValue Op0 = Op.getOperand(0);
EVT Op0VT = Op0.getValueType();
- if (Op0VT.getSimpleVT() == EVT::i128 && simpleVT == EVT::i64) {
+ if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
// Create shuffle mask, least significant doubleword of quadword
unsigned maskHigh = 0x08090a0b;
unsigned maskLow = 0x0c0d0e0f;
// Use a shuffle to perform the truncation
- SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
- DAG.getConstant(maskHigh, EVT::i32),
- DAG.getConstant(maskLow, EVT::i32),
- DAG.getConstant(maskHigh, EVT::i32),
- DAG.getConstant(maskLow, EVT::i32));
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32),
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32));
SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
Op0, Op0, shufMask);
return SDValue(); // Leave the truncate unmolested
}
+/*!
+ * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
+ * algorithm is to duplicate the sign bit using rotmai to generate at
+ * least one byte full of sign bits. Then propagate the "sign-byte" into
+ * the leftmost words and the i64/i32 into the rightmost words using shufb.
+ *
+ * @param Op The sext operand
+ * @param DAG The current DAG
+ * @return The SDValue with the entire instruction sequence
+ */
+static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
+{
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Type to extend to
+ MVT OpVT = Op.getValueType().getSimpleVT();
+
+ // Type to extend from
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType().getSimpleVT();
+
+ // extend i8 & i16 via i32
+ if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
+ Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
+ Op0VT = MVT::i32;
+ }
+
+ // The type to extend to needs to be a i128 and
+ // the type to extend from needs to be i64 or i32.
+ assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
+ "LowerSIGN_EXTEND: input and/or output operand have wrong size");
+ (void)OpVT;
+
+ // Create shuffle mask
+ unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
+ unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
+ unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(mask1, MVT::i32),
+ DAG.getConstant(mask1, MVT::i32),
+ DAG.getConstant(mask2, MVT::i32),
+ DAG.getConstant(mask3, MVT::i32));
+
+ // Word wise arithmetic right shift to generate at least one byte
+ // that contains sign bits.
+ MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
+ SDValue sraVal = DAG.getNode(ISD::SRA,
+ dl,
+ mvt,
+ DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
+ DAG.getConstant(31, MVT::i32));
+
+ // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
+ SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ dl, Op0VT, Op0,
+ DAG.getTargetConstant(
+ SPU::GPRCRegClass.getID(),
+ MVT::i32)), 0);
+ // Shuffle bytes - Copy the sign bits into the upper 64 bits
+ // and the input value into the lower 64 bits.
+ SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
+ extended, sraVal, shufMask);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
+}
+
//! Custom (target-specific) lowering entry point
/*!
This is where LLVM's DAG selection process calls to do target-specific
lowering of nodes.
*/
SDValue
-SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
+SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
unsigned Opc = (unsigned) Op.getOpcode();
EVT VT = Op.getValueType();
switch (Opc) {
default: {
#ifndef NDEBUG
- cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
- cerr << "Op.getOpcode() = " << Opc << "\n";
- cerr << "*Op.getNode():\n";
+ errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+ errs() << "Op.getOpcode() = " << Opc << "\n";
+ errs() << "*Op.getNode():\n";
Op.getNode()->dump();
#endif
llvm_unreachable(0);
case ISD::SRL:
case ISD::SHL:
case ISD::SRA: {
- if (VT == EVT::i8)
+ if (VT == MVT::i8)
return LowerI8Math(Op, DAG, Opc, *this);
break;
}
// Vector and i8 multiply:
case ISD::MUL:
- if (VT == EVT::i8)
+ if (VT == MVT::i8)
return LowerI8Math(Op, DAG, Opc, *this);
case ISD::CTPOP:
case ISD::TRUNCATE:
return LowerTRUNCATE(Op, DAG);
+
+ case ISD::SIGN_EXTEND:
+ return LowerSIGN_EXTEND(Op, DAG);
}
return SDValue();
void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG)
+ SelectionDAG &DAG) const
{
#if 0
unsigned Opc = (unsigned) N->getOpcode();
switch (Opc) {
default: {
- cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
- cerr << "Op.getOpcode() = " << Opc << "\n";
- cerr << "*Op.getNode():\n";
+ errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+ errs() << "Op.getOpcode() = " << Opc << "\n";
+ errs() << "*Op.getNode():\n";
N->dump();
abort();
/*NOTREACHED*/
#if !defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- cerr << "\n"
+ errs() << "\n"
<< "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
<< "With: (SPUindirect <arg>, <arg>)\n";
}
#if !defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- cerr << "\n"
+ errs() << "\n"
<< "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
<< "), " << CN0->getSExtValue() << ")\n"
<< "With: (SPUindirect <arg>, "
// Types must match, however...
#if !defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- cerr << "\nReplace: ";
+ errs() << "\nReplace: ";
N->dump(&DAG);
- cerr << "\nWith: ";
+ errs() << "\nWith: ";
Op0.getNode()->dump(&DAG);
- cerr << "\n";
+ errs() << "\n";
}
#endif
case SPUISD::IndirectAddr: {
if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (CN != 0 && CN->getZExtValue() == 0) {
+ if (CN != 0 && CN->isNullValue()) {
// (SPUindirect (SPUaform <addr>, 0), 0) ->
// (SPUaform <addr>, 0)
- DEBUG(cerr << "Replace: ");
+ DEBUG(errs() << "Replace: ");
DEBUG(N->dump(&DAG));
- DEBUG(cerr << "\nWith: ");
+ DEBUG(errs() << "\nWith: ");
DEBUG(Op0.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
+ DEBUG(errs() << "\n");
return Op0;
}
#if !defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- cerr << "\n"
+ errs() << "\n"
<< "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
<< "With: (SPUindirect <arg>, <arg>)\n";
}
}
break;
}
- case SPUISD::SHLQUAD_L_BITS:
- case SPUISD::SHLQUAD_L_BYTES:
- case SPUISD::VEC_SHL:
- case SPUISD::VEC_SRL:
- case SPUISD::VEC_SRA:
+ case SPUISD::SHL_BITS:
+ case SPUISD::SHL_BYTES:
case SPUISD::ROTBYTES_LEFT: {
SDValue Op1 = N->getOperand(1);
// Otherwise, return unchanged.
#ifndef NDEBUG
if (Result.getNode()) {
- DEBUG(cerr << "\nReplace.SPU: ");
+ DEBUG(errs() << "\nReplace.SPU: ");
DEBUG(N->dump(&DAG));
- DEBUG(cerr << "\nWith: ");
+ DEBUG(errs() << "\nWith: ");
DEBUG(Result.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
+ DEBUG(errs() << "\n");
}
#endif
return TargetLowering::getConstraintType(ConstraintLetter);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+SPUTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ //FIXME: Seems like the supported constraint letters were just copied
+ // from PPC, as the following doesn't correspond to the GCC docs.
+ // I'm leaving it so until someone adds the corresponding lowering support.
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'd':
+ case 'v':
+ case 'y':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
std::pair<unsigned, const TargetRegisterClass*>
SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const
switch (Constraint[0]) {
case 'b': // R1-R31
case 'r': // R0-R31
- if (VT == EVT::i64)
+ if (VT == MVT::i64)
return std::make_pair(0U, SPU::R64CRegisterClass);
return std::make_pair(0U, SPU::R32CRegisterClass);
case 'f':
- if (VT == EVT::f32)
+ if (VT == MVT::f32)
return std::make_pair(0U, SPU::R32FPRegisterClass);
- else if (VT == EVT::f64)
+ else if (VT == MVT::f64)
return std::make_pair(0U, SPU::R64FPRegisterClass);
break;
case 'v':
case SPUISD::VEC2PREFSLOT:
case SPUISD::SHLQUAD_L_BITS:
case SPUISD::SHLQUAD_L_BYTES:
- case SPUISD::VEC_SHL:
- case SPUISD::VEC_SRL:
- case SPUISD::VEC_SRA:
case SPUISD::VEC_ROTL:
case SPUISD::VEC_ROTR:
case SPUISD::ROTBYTES_LEFT:
case ISD::SETCC: {
EVT VT = Op.getValueType();
- if (VT != EVT::i8 && VT != EVT::i16 && VT != EVT::i32) {
- VT = EVT::i32;
+ if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
+ VT = MVT::i32;
}
return VT.getSizeInBits();
}
// LowerAsmOperandForConstraint
void
SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- char ConstraintLetter,
- bool hasMemory,
+ std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
// Default, for the time being, to the base class handler
- TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
- Ops, DAG);
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
/// isLegalAddressImmediate - Return true if the integer value can be used
/// as the offset of the target addressing mode.
bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
- const Type *Ty) const {
+ Type *Ty) const {
// SPU's addresses are 256K:
return (V > -(1 << 18) && V < (1 << 18) - 1);
}
// The SPU target isn't yet aware of offsets.
return false;
}
+
+// can we compare to Imm without writing it into a register?
+bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ //ceqi, cgti, etc. all take s10 operand
+ return isInt<10>(Imm);
+}
+
+bool
+SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type * ) const{
+
+ // A-form: 18bit absolute address.
+ if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
+ return true;
+
+ // D-form: reg + 14bit offset
+ if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
+ return true;
+
+ // X-form: reg+reg
+ if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
+ return true;
+
+ return false;
+}