X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FCellSPU%2FSPUISelLowering.cpp;h=286e9ab837f0e37a41d27063ac4f1d73a7c3cacb;hb=1bd7335a17010bd4d8f86736cf73cac9f3fb80a5;hp=a08ee876f873f21f825988d231e857b6645e1f2d;hpb=d021901b097f74ef1e14f2547884acfbbb4a000c;p=oota-llvm.git diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index a08ee876f87..286e9ab837f 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1,5 +1,5 @@ -//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// // +//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source @@ -15,20 +15,23 @@ #include "SPUISelLowering.h" #include "SPUTargetMachine.h" #include "SPUFrameInfo.h" -#include "llvm/ADT/VectorExtras.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetOptions.h" - +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -37,10 +40,10 @@ using namespace llvm; namespace { std::map node_names; - //! MVT mapping to useful data for Cell SPU + //! EVT mapping to useful data for Cell SPU struct valtype_map_s { - const MVT valtype; - const int prefslot_byte; + EVT valtype; + int prefslot_byte; }; const valtype_map_s valtype_map[] = { @@ -56,7 +59,7 @@ namespace { const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); - const valtype_map_s *getValueTypeMapEntry(MVT VT) { + const valtype_map_s *getValueTypeMapEntry(EVT VT) { const valtype_map_s *retval = 0; for (size_t i = 0; i < n_valtype_map; ++i) { @@ -68,52 +71,59 @@ namespace { #ifndef NDEBUG if (retval == 0) { - cerr << "getValueTypeMapEntry returns NULL for " - << VT.getMVTString() - << "\n"; - abort(); + report_fatal_error("getValueTypeMapEntry returns NULL for " + + Twine(VT.getEVTString())); } #endif return retval; } - //! Predicate that returns true if operand is a memory target + //! Expand a library call into an actual call DAG node /*! - \arg Op Operand to test - \return true if the operand is a memory target (i.e., global - address, external symbol, constant pool) or an A-form - address. + \note + This code is taken from SelectionDAGLegalize, since it is not exposed as + part of the LLVM SelectionDAG API. */ - bool isMemoryOperand(const SDValue &Op) - { - const unsigned Opc = Op.getOpcode(); - return (Opc == ISD::GlobalAddress - || Opc == ISD::GlobalTLSAddress - || Opc == ISD::JumpTable - || Opc == ISD::ConstantPool - || Opc == ISD::ExternalSymbol - || Opc == ISD::TargetGlobalAddress - || Opc == ISD::TargetGlobalTLSAddress - || Opc == ISD::TargetJumpTable - || Opc == ISD::TargetConstantPool - || Opc == ISD::TargetExternalSymbol - || Opc == SPUISD::AFormAddr); - } - - //! Predicate that returns true if the operand is an indirect target - bool isIndirectOperand(const SDValue &Op) - { - const unsigned Opc = Op.getOpcode(); - return (Opc == ISD::Register - || Opc == SPUISD::LDRESULT); + + SDValue + ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, + bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) { + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + EVT ArgVT = Op.getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op.getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = + Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); + std::pair CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Op.getDebugLoc()); + + return CallInfo.first; } } SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) - : TargetLowering(TM), - SPUTM(TM) -{ + : TargetLowering(TM, new TargetLoweringObjectFileELF()), + SPUTM(TM) { // Fold away setcc operations if possible. setPow2DivIsCheap(); @@ -121,6 +131,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(true); + // Set RTLIB libcall names as used by SPU: + setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); + // Set up the SPU's register classes: addRegisterClass(MVT::i8, SPU::R8CRegisterClass); addRegisterClass(MVT::i16, SPU::R16CRegisterClass); @@ -130,45 +143,54 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); - // Initialize libcalls: - setLibcallName(RTLIB::MUL_I64, "__muldi3"); - // SPU has no sign or zero extended loads for i1, i8, i16: setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); - setTruncStoreAction(MVT::i8, MVT::i8, Custom); - setTruncStoreAction(MVT::i16, MVT::i8, Custom); - setTruncStoreAction(MVT::i32, MVT::i8, Custom); - setTruncStoreAction(MVT::i64, MVT::i8, Custom); - setTruncStoreAction(MVT::i128, MVT::i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); + setTruncStoreAction(MVT::i128, MVT::i64, Expand); + setTruncStoreAction(MVT::i128, MVT::i32, Expand); + setTruncStoreAction(MVT::i128, MVT::i16, Expand); + setTruncStoreAction(MVT::i128, MVT::i8, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); // SPU constant load actions are custom lowered: - setOperationAction(ISD::Constant, MVT::i64, Custom); setOperationAction(ISD::ConstantFP, MVT::f32, Legal); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); // SPU's loads and stores have to be custom lowered: - for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; + for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; ++sctype) { - MVT VT = (MVT::SimpleValueType)sctype; + MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; + + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setLoadExtAction(ISD::EXTLOAD, VT, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, Custom); - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { + MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } } - // Custom lower BRCOND for i8 to "promote" the result to i16 - setOperationAction(ISD::BRCOND, MVT::Other, Custom); + for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; + ++sctype) { + MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; + + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { + MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } + } // Expand the jumptable branches setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -179,18 +201,42 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); -#if 0 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); -#endif // SPU has no intrinsics for these particular operations: setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - // PowerPC has no SREM/UREM instructions - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); + // SPU has no division/remainder instructions + setOperationAction(ISD::SREM, MVT::i8, Expand); + setOperationAction(ISD::UREM, MVT::i8, Expand); + setOperationAction(ISD::SDIV, MVT::i8, Expand); + setOperationAction(ISD::UDIV, MVT::i8, Expand); + setOperationAction(ISD::SDIVREM, MVT::i8, Expand); + setOperationAction(ISD::UDIVREM, MVT::i8, Expand); + setOperationAction(ISD::SREM, MVT::i16, Expand); + setOperationAction(ISD::UREM, MVT::i16, Expand); + setOperationAction(ISD::SDIV, MVT::i16, Expand); + setOperationAction(ISD::UDIV, MVT::i16, Expand); + setOperationAction(ISD::SDIVREM, MVT::i16, Expand); + setOperationAction(ISD::UDIVREM, MVT::i16, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i128, Expand); + setOperationAction(ISD::UREM, MVT::i128, Expand); + setOperationAction(ISD::SDIV, MVT::i128, Expand); + setOperationAction(ISD::UDIV, MVT::i128, Expand); + setOperationAction(ISD::SDIVREM, MVT::i128, Expand); + setOperationAction(ISD::UDIVREM, MVT::i128, Expand); // We don't support sin/cos/sqrt/fmod setOperationAction(ISD::FSIN , MVT::f64, Expand); @@ -200,7 +246,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); - // If we're enabling GP optimizations, use hardware square root + // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt + // for f32!) setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); @@ -225,26 +272,40 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SRL, MVT::i8, Custom); setOperationAction(ISD::SRA, MVT::i8, Custom); - // SPU needs custom lowering for shift left/right for i64 - setOperationAction(ISD::SHL, MVT::i64, Custom); - setOperationAction(ISD::SRL, MVT::i64, Custom); - setOperationAction(ISD::SRA, MVT::i64, Custom); + // Make these operations legal and handle them during instruction selection: + setOperationAction(ISD::SHL, MVT::i64, Legal); + setOperationAction(ISD::SRL, MVT::i64, Legal); + setOperationAction(ISD::SRA, MVT::i64, Legal); // Custom lower i8, i32 and i64 multiplications setOperationAction(ISD::MUL, MVT::i8, Custom); - setOperationAction(ISD::MUL, MVT::i32, Custom); - setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall - - // SMUL_LOHI, UMUL_LOHI - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); + setOperationAction(ISD::MUL, MVT::i32, Legal); + setOperationAction(ISD::MUL, MVT::i64, Legal); + + // Expand double-width multiplication + // FIXME: It would probably be reasonable to support some of these operations + setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::MULHU, MVT::i8, Expand); + setOperationAction(ISD::MULHS, MVT::i8, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::MULHU, MVT::i16, Expand); + setOperationAction(ISD::MULHS, MVT::i16, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); // Need to custom handle (some) common i8, i64 math ops - setOperationAction(ISD::ADD, MVT::i64, Custom); + setOperationAction(ISD::ADD, MVT::i8, Custom); + setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::SUB, MVT::i8, Custom); - setOperationAction(ISD::SUB, MVT::i64, Custom); + setOperationAction(ISD::SUB, MVT::i64, Legal); // SPU does not have BSWAP. It does have i32 support CTLZ. // CTPOP has to be custom lowered. @@ -255,53 +316,62 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::CTPOP, MVT::i16, Custom); setOperationAction(ISD::CTPOP, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i64, Custom); + setOperationAction(ISD::CTPOP, MVT::i128, Expand); + setOperationAction(ISD::CTTZ , MVT::i8, Expand); + setOperationAction(ISD::CTTZ , MVT::i16, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i64, Expand); + setOperationAction(ISD::CTTZ , MVT::i128, Expand); + setOperationAction(ISD::CTLZ , MVT::i8, Promote); + setOperationAction(ISD::CTLZ , MVT::i16, Promote); setOperationAction(ISD::CTLZ , MVT::i32, Legal); + setOperationAction(ISD::CTLZ , MVT::i64, Expand); + setOperationAction(ISD::CTLZ , MVT::i128, Expand); // SPU has a version of select that implements (a&~c)|(b&c), just like // select ought to work: setOperationAction(ISD::SELECT, MVT::i8, Legal); setOperationAction(ISD::SELECT, MVT::i16, Legal); setOperationAction(ISD::SELECT, MVT::i32, Legal); - setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Legal); setOperationAction(ISD::SETCC, MVT::i8, Legal); setOperationAction(ISD::SETCC, MVT::i16, Legal); setOperationAction(ISD::SETCC, MVT::i32, Legal); - setOperationAction(ISD::SETCC, MVT::i64, Expand); - - // Zero extension and sign extension for i64 have to be - // custom legalized - setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom); - - // Custom lower truncates - setOperationAction(ISD::TRUNCATE, MVT::i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::i64, Legal); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + + // Custom lower i128 -> i64 truncates setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); - // SPU has a legal FP -> signed INT instruction - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + // Custom lower i32/i64 -> i128 sign extend + setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); + + setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); + // SPU has a legal FP -> signed INT instruction for f32, but for f64, need + // to expand to a libcall, hence the custom lowering: + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); // FDIV on SPU requires custom lowering - setOperationAction(ISD::FDIV, MVT::f32, Custom); - //setOperationAction(ISD::FDIV, MVT::f64, Custom); + setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall - // SPU has [U|S]INT_TO_FP - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); @@ -313,24 +383,17 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - // Support label based line numbers. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; ++sctype) { - MVT VT = (MVT::SimpleValueType)sctype; + MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; - setOperationAction(ISD::GlobalAddress, VT, Custom); - setOperationAction(ISD::ConstantPool, VT, Custom); - setOperationAction(ISD::JumpTable, VT, Custom); + setOperationAction(ISD::GlobalAddress, VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::JumpTable, VT, Custom); } - // RET must be custom lowered, to meet ABI requirements - setOperationAction(ISD::RET, MVT::Other, Custom); - // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); @@ -362,29 +425,31 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); + // "Odd size" vector classes that we're willing to support: + addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass); + for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; + MVT::SimpleValueType VT = (MVT::SimpleValueType)i; // add/sub are legal for all supported vector VT's. - setOperationAction(ISD::ADD , VT, Legal); - setOperationAction(ISD::SUB , VT, Legal); + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); // mul has to be custom lowered. - setOperationAction(ISD::MUL , VT, Custom); + setOperationAction(ISD::MUL, VT, Legal); - setOperationAction(ISD::AND , VT, Legal); - setOperationAction(ISD::OR , VT, Legal); - setOperationAction(ISD::XOR , VT, Legal); - setOperationAction(ISD::LOAD , VT, Legal); - setOperationAction(ISD::SELECT, VT, Legal); - setOperationAction(ISD::STORE, VT, Legal); + setOperationAction(ISD::AND, VT, Legal); + setOperationAction(ISD::OR, VT, Legal); + setOperationAction(ISD::XOR, VT, Legal); + setOperationAction(ISD::LOAD, VT, Legal); + setOperationAction(ISD::SELECT, VT, Legal); + setOperationAction(ISD::STORE, VT, Legal); // These operations need to be expanded: - setOperationAction(ISD::SDIV, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::UDIV, VT, Expand); - setOperationAction(ISD::UREM, VT, Expand); - setOperationAction(ISD::FDIV, VT, Custom); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); // Custom lower build_vector, constant pool spills, insert and // extract vector elements: @@ -396,14 +461,15 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } - setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::AND, MVT::v16i8, Custom); setOperationAction(ISD::OR, MVT::v16i8, Custom); setOperationAction(ISD::XOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setShiftAmountType(MVT::i32); - setBooleanContents(ZeroOrOneBooleanContent); + setBooleanContents(ZeroOrNegativeOneBooleanContent); setStackPointerRegisterToSaveRestore(SPU::R1); @@ -415,8 +481,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) computeRegisterProperties(); - // Set other properties: - setSchedulingPreference(SchedulingForLatency); + // Set pre-RA register scheduler default to BURR, which produces slightly + // better code than the default (could also be TDRR, but TargetLowering.h + // needs a mod to support that model): + setSchedulingPreference(SchedulingForRegPressure); } const char * @@ -434,35 +502,20 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; - node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR"; + node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; - node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY"; - node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU"; - node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH"; - node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH"; node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; - node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; - node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL"; - node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; - node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] = - "SPUISD::ROTQUAD_RZ_BYTES"; - node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] = - "SPUISD::ROTQUAD_RZ_BITS"; node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = - "SPUISD::ROTBYTES_LEFT_BITS"; + "SPUISD::ROTBYTES_LEFT_BITS"; node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; - node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED"; - node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE"; - node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED"; - node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE"; - node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp"; - node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst"; - node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; + node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; + node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; + node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; } std::map::iterator i = node_names.find(Opcode); @@ -470,9 +523,20 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const return ((i != node_names.end()) ? i->second : 0); } -MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const { - MVT VT = Op.getValueType(); - return (VT.isInteger() ? VT : MVT(MVT::i32)); +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const { + return 3; +} + +//===----------------------------------------------------------------------===// +// Return the Cell SPU's SETCC result type +//===----------------------------------------------------------------------===// + +MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const { + // i16 and i32 are valid SETCC result types + return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? + VT.getSimpleVT().SimpleTy : + MVT::i32); } //===----------------------------------------------------------------------===// @@ -485,106 +549,6 @@ MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const { // LowerOperation implementation //===----------------------------------------------------------------------===// -/// Aligned load common code for CellSPU -/*! - \param[in] Op The SelectionDAG load or store operand - \param[in] DAG The selection DAG - \param[in] ST CellSPU subtarget information structure - \param[in,out] alignment Caller initializes this to the load or store node's - value from getAlignment(), may be updated while generating the aligned load - \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned - offset (divisible by 16, modulo 16 == 0) - \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the - offset of the preferred slot (modulo 16 != 0) - \param[in,out] VT Caller initializes this value type to the the load or store - node's loaded or stored value type; may be updated if an i1-extended load or - store. - \param[out] was16aligned true if the base pointer had 16-byte alignment, - otherwise false. Can help to determine if the chunk needs to be rotated. - - Both load and store lowering load a block of data aligned on a 16-byte - boundary. This is the common aligned load code shared between both. - */ -static SDValue -AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST, - LSBaseSDNode *LSN, - unsigned &alignment, int &alignOffs, int &prefSlotOffs, - MVT &VT, bool &was16aligned) -{ - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - const valtype_map_s *vtm = getValueTypeMapEntry(VT); - SDValue basePtr = LSN->getBasePtr(); - SDValue chain = LSN->getChain(); - - if (basePtr.getOpcode() == ISD::ADD) { - SDValue Op1 = basePtr.getNode()->getOperand(1); - - if (Op1.getOpcode() == ISD::Constant - || Op1.getOpcode() == ISD::TargetConstant) { - const ConstantSDNode *CN = cast(basePtr.getOperand(1)); - - alignOffs = (int) CN->getZExtValue(); - prefSlotOffs = (int) (alignOffs & 0xf); - - // Adjust the rotation amount to ensure that the final result ends up in - // the preferred slot: - prefSlotOffs -= vtm->prefslot_byte; - basePtr = basePtr.getOperand(0); - - // Loading from memory, can we adjust alignment? - if (basePtr.getOpcode() == SPUISD::AFormAddr) { - SDValue APtr = basePtr.getOperand(0); - if (APtr.getOpcode() == ISD::TargetGlobalAddress) { - GlobalAddressSDNode *GSDN = cast(APtr); - alignment = GSDN->getGlobal()->getAlignment(); - } - } - } else { - alignOffs = 0; - prefSlotOffs = -vtm->prefslot_byte; - } - } else if (basePtr.getOpcode() == ISD::FrameIndex) { - FrameIndexSDNode *FIN = cast(basePtr); - alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize()); - prefSlotOffs = (int) (alignOffs & 0xf); - prefSlotOffs -= vtm->prefslot_byte; - basePtr = DAG.getRegister(SPU::R1, VT); - } else { - alignOffs = 0; - prefSlotOffs = -vtm->prefslot_byte; - } - - if (alignment == 16) { - // Realign the base pointer as a D-Form address: - if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) { - basePtr = DAG.getNode(ISD::ADD, PtrVT, - basePtr, - DAG.getConstant((alignOffs & ~0xf), PtrVT)); - } - - // Emit the vector load: - was16aligned = true; - return DAG.getLoad(MVT::v16i8, chain, basePtr, - LSN->getSrcValue(), LSN->getSrcValueOffset(), - LSN->isVolatile(), 16); - } - - // Unaligned load or we're using the "large memory" model, which means that - // we have to be very pessimistic: - if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Add the offset - basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, - DAG.getConstant((alignOffs & ~0xf), PtrVT)); - was16aligned = false; - return DAG.getLoad(MVT::v16i8, chain, basePtr, - LSN->getSrcValue(), LSN->getSrcValueOffset(), - LSN->isVolatile(), 16); -} - /// Custom lower loads for CellSPU /*! All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements @@ -594,9 +558,9 @@ AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST, emitted, e.g. for MVT::f32 extending load to MVT::f64: \verbatim -%1 v16i8,ch = load +%1 v16i8,ch = load %2 v16i8,ch = rotate %1 -%3 v4f8, ch = bitconvert %2 +%3 v4f8, ch = bitconvert %2 %4 f32 = vec2perfslot %3 %5 f64 = fp_extend %4 \endverbatim @@ -605,61 +569,130 @@ static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { LoadSDNode *LN = cast(Op); SDValue the_chain = LN->getChain(); - MVT InVT = LN->getMemoryVT(); - MVT OutVT = Op.getValueType(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT InVT = LN->getMemoryVT(); + EVT OutVT = Op.getValueType(); ISD::LoadExtType ExtType = LN->getExtensionType(); unsigned alignment = LN->getAlignment(); - SDValue Ops[8]; + const valtype_map_s *vtm = getValueTypeMapEntry(InVT); + DebugLoc dl = Op.getDebugLoc(); switch (LN->getAddressingMode()) { case ISD::UNINDEXED: { - int offset, rotamt; - bool was16aligned; - SDValue result = - AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT, - was16aligned); - - if (result.getNode() == 0) - return result; - - the_chain = result.getValue(1); - // Rotate the chunk if necessary - if (rotamt < 0) - rotamt += 16; - if (rotamt != 0 || !was16aligned) { - SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other); - - Ops[0] = result; - if (was16aligned) { - Ops[1] = DAG.getConstant(rotamt, MVT::i16); + SDValue result; + SDValue basePtr = LN->getBasePtr(); + SDValue rotate; + + if (alignment == 16) { + ConstantSDNode *CN; + + // Special cases for a known aligned load to simplify the base pointer + // and the rotation amount: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast (basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte); + + if (rotamt < 0) + rotamt += 16; + + rotate = DAG.getConstant(rotamt, MVT::i16); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) + || (basePtr.getOpcode() == SPUISD::IndirectAddr + && basePtr.getOperand(0).getOpcode() == SPUISD::Hi + && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { + // Plain aligned a-form address: rotate into preferred slot + // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) + int64_t rotamt = -vtm->prefslot_byte; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getConstant(rotamt, MVT::i16); } else { - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - LoadSDNode *LN1 = cast(result); - Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(), + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + int64_t rotamt = -vtm->prefslot_byte; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, DAG.getConstant(rotamt, PtrVT)); } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa(Op1)) { + // Convert the (add , ) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); + } else { + // Convert the (add , ) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } - result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2); + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + rotate = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(-vtm->prefslot_byte, PtrVT)); } + // Re-emit as a v16i8 vector load + result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), LN->isNonTemporal(), 16); + + // Update the chain + the_chain = result.getValue(1); + + // Rotate into the preferred slot: + result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8, + result.getValue(0), rotate); + // Convert the loaded v16i8 vector to the appropriate vector type // specified by the operand: - MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits())); - result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT, - DAG.getNode(ISD::BIT_CONVERT, vecVT, result)); + EVT vecVT = EVT::getVectorVT(*DAG.getContext(), + InVT, (128 / InVT.getSizeInBits())); + result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, + DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result)); // Handle extending loads by extending the scalar result: if (ExtType == ISD::SEXTLOAD) { - result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result); + result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); } else if (ExtType == ISD::ZEXTLOAD) { - result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result); + result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); } else if (ExtType == ISD::EXTLOAD) { unsigned NewOpc = ISD::ANY_EXTEND; if (OutVT.isFloatingPoint()) - NewOpc = ISD::FP_EXTEND; + NewOpc = ISD::FP_EXTEND; - result = DAG.getNode(NewOpc, OutVT, result); + result = DAG.getNode(NewOpc, dl, OutVT, result); } SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); @@ -668,7 +701,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { the_chain }; - result = DAG.getNode(SPUISD::LDRESULT, retvts, + result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, retops, sizeof(retops) / sizeof(retops[0])); return result; } @@ -677,11 +710,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { case ISD::POST_INC: case ISD::POST_DEC: case ISD::LAST_INDEXED_MODE: - cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " - "UNINDEXED\n"; - cerr << (unsigned) LN->getAddressingMode() << "\n"; - abort(); - /*NOTREACHED*/ + { + report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " + "than UNINDEXED\n" + + Twine((unsigned)LN->getAddressingMode())); + /*NOTREACHED*/ + } } return SDValue(); @@ -697,30 +731,94 @@ static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { StoreSDNode *SN = cast(Op); SDValue Value = SN->getValue(); - MVT VT = Value.getValueType(); - MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT VT = Value.getValueType(); + EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); unsigned alignment = SN->getAlignment(); switch (SN->getAddressingMode()) { case ISD::UNINDEXED: { - int chunk_offset, slot_offset; - bool was16aligned; - // The vector type we really want to load from the 16-byte chunk. - MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())), - stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits())); + EVT vecVT = EVT::getVectorVT(*DAG.getContext(), + VT, (128 / VT.getSizeInBits())); + + SDValue alignLoadVec; + SDValue basePtr = SN->getBasePtr(); + SDValue the_chain = SN->getChain(); + SDValue insertEltOffs; + + if (alignment == 16) { + ConstantSDNode *CN; + + // Special cases for a known aligned load to simplify the base pointer + // and insertion byte: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast(basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & 0xf), PtrVT)); + + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else { + // Otherwise, assume it's at byte 0 of basePtr + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa(Op1)) { + // Convert the (add , ) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); + } else { + // Convert the (add , ) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } - SDValue alignLoadVec = - AlignedLoad(Op, DAG, ST, SN, alignment, - chunk_offset, slot_offset, VT, was16aligned); + // Insertion point is solely determined by basePtr's contents + insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } - if (alignLoadVec.getNode() == 0) - return alignLoadVec; + // Re-emit as a v16i8 vector load + alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, + SN->getSrcValue(), SN->getSrcValueOffset(), + SN->isVolatile(), SN->isNonTemporal(), 16); + + // Update the chain + the_chain = alignLoadVec.getValue(1); LoadSDNode *LN = cast(alignLoadVec); - SDValue basePtr = LN->getBasePtr(); - SDValue the_chain = alignLoadVec.getValue(1); SDValue theValue = SN->getValue(); SDValue result; @@ -732,48 +830,41 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { theValue = theValue.getOperand(0); } - chunk_offset &= 0xf; - - SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT); - SDValue insertEltPtr; - // If the base pointer is already a D-form address, then just create // a new D-form address with a slot offset and the orignal base pointer. // Otherwise generate a D-form address with the slot offset relative // to the stack pointer, which is always aligned. - DEBUG(cerr << "CellSPU LowerSTORE: basePtr = "); - DEBUG(basePtr.getNode()->dump(&DAG)); - DEBUG(cerr << "\n"); - - if (basePtr.getOpcode() == SPUISD::IndirectAddr || - (basePtr.getOpcode() == ISD::ADD - && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) { - insertEltPtr = basePtr; - } else { - insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs); - } +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "CellSPU LowerSTORE: basePtr = "; + basePtr.getNode()->dump(&DAG); + errs() << "\n"; + } +#endif SDValue insertEltOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr); + DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); SDValue vectorizeOp = - DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue); + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); - result = DAG.getNode(SPUISD::SHUFB, vecVT, - vectorizeOp, alignLoadVec, - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp)); + result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, + vectorizeOp, alignLoadVec, + DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::v4i32, insertEltOp)); - result = DAG.getStore(the_chain, result, basePtr, + result = DAG.getStore(the_chain, dl, result, basePtr, LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), LN->getAlignment()); + LN->isVolatile(), LN->isNonTemporal(), + LN->getAlignment()); -#if 0 && defined(NDEBUG) +#if 0 && !defined(NDEBUG) if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { const SDValue ¤tRoot = DAG.getRoot(); DAG.setRoot(result); - cerr << "------- CellSPU:LowerStore result:\n"; + errs() << "------- CellSPU:LowerStore result:\n"; DAG.dump(); - cerr << "-------\n"; + errs() << "-------\n"; DAG.setRoot(currentRoot); } #endif @@ -786,112 +877,98 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { case ISD::POST_INC: case ISD::POST_DEC: case ISD::LAST_INDEXED_MODE: - cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " - "UNINDEXED\n"; - cerr << (unsigned) SN->getAddressingMode() << "\n"; - abort(); - /*NOTREACHED*/ + { + report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " + "than UNINDEXED\n" + + Twine((unsigned)SN->getAddressingMode())); + /*NOTREACHED*/ + } } return SDValue(); } -/// Generate the address of a constant pool entry. +//! Generate the address of a constant pool entry. static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); Constant *C = CP->getConstVal(); SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); SDValue Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); if (TM.getRelocationModel() == Reloc::Static) { if (!ST->usingLargeMem()) { // Just return the SDValue with the constant pool address in it. - return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero); + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); } else { - SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero); - SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero); - return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo); + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); } } - assert(0 && - "LowerConstantPool: Relocation model other than static" - " not supported."); + llvm_unreachable("LowerConstantPool: Relocation model other than static" + " not supported."); return SDValue(); } +//! Alternate entry point for generating the address of a constant pool entry +SDValue +SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { + return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); +} + static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); SDValue Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); if (TM.getRelocationModel() == Reloc::Static) { if (!ST->usingLargeMem()) { - return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero); + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); } else { - SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero); - SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero); - return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo); + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); } } - assert(0 && - "LowerJumpTable: Relocation model other than static not supported."); + llvm_unreachable("LowerJumpTable: Relocation model other than static" + " not supported."); return SDValue(); } static SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); GlobalValue *GV = GSDN->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); const TargetMachine &TM = DAG.getTarget(); SDValue Zero = DAG.getConstant(0, PtrVT); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); if (TM.getRelocationModel() == Reloc::Static) { if (!ST->usingLargeMem()) { - return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero); + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); } else { - SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero); - SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero); - return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo); + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); } } else { - cerr << "LowerGlobalAddress: Relocation model other than static not " - << "supported.\n"; - abort(); - /*NOTREACHED*/ - } - - return SDValue(); -} - -//! Custom lower i64 integer constants -/*! - This code inserts all of the necessary juggling that needs to occur to load - a 64-bit constant into a register. - */ -static SDValue -LowerConstant(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - - if (VT == MVT::i64) { - ConstantSDNode *CN = cast(Op.getNode()); - SDValue T = DAG.getConstant(CN->getZExtValue(), VT); - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); - } else { - cerr << "LowerConstant: unhandled constant type " - << VT.getMVTString() - << "\n"; - abort(); + report_fatal_error("LowerGlobalAddress: Relocation model other than static" + "not supported."); /*NOTREACHED*/ } @@ -901,51 +978,37 @@ LowerConstant(SDValue Op, SelectionDAG &DAG) { //! Custom lower double precision floating point constants static SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); if (VT == MVT::f64) { ConstantFPSDNode *FP = cast(Op.getNode()); assert((FP != 0) && "LowerConstantFP: Node is not ConstantFPSDNode"); - + uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); SDValue T = DAG.getConstant(dbits, MVT::i64); - SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T); - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec)); + SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec)); } return SDValue(); } -//! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16) -static SDValue -LowerBRCOND(SDValue Op, SelectionDAG &DAG) -{ - SDValue Cond = Op.getOperand(1); - MVT CondVT = Cond.getValueType(); - MVT CondNVT; - - if (CondVT == MVT::i8) { - CondNVT = MVT::i16; - return DAG.getNode(ISD::BRCOND, Op.getValueType(), - Op.getOperand(0), - DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)), - Op.getOperand(2)); - } else - return SDValue(); // Unchanged -} +SDValue +SPUTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { -static SDValue -LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) -{ MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - SmallVector ArgValues; - SDValue Root = Op.getOperand(0); - bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); @@ -954,25 +1017,21 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) unsigned ArgRegIdx = 0; unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Add DAG nodes to load the arguments or copy them out of registers. - for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1; - ArgNo != e; ++ArgNo) { - MVT ObjectVT = Op.getValue(ArgNo).getValueType(); + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { + EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; SDValue ArgVal; if (ArgRegIdx < NumArgRegs) { const TargetRegisterClass *ArgRegClass; - switch (ObjectVT.getSimpleVT()) { - default: { - cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " - << ObjectVT.getMVTString() - << "\n"; - abort(); - } + switch (ObjectVT.getSimpleVT().SimpleTy) { + default: + report_fatal_error("LowerFormalArguments Unhandled argument type: " + + Twine(ObjectVT.getEVTString())); case MVT::i8: ArgRegClass = &SPU::R8CRegClass; break; @@ -985,6 +1044,9 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) case MVT::i64: ArgRegClass = &SPU::R64CRegClass; break; + case MVT::i128: + ArgRegClass = &SPU::GPRCRegClass; + break; case MVT::f32: ArgRegClass = &SPU::R32FPRegClass; break; @@ -1003,21 +1065,21 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++ArgRegIdx; } else { // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type // or we're forced to do vararg - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); ArgOffset += StackSlotSize; } - ArgValues.push_back(ArgVal); + InVals.push_back(ArgVal); // Update the chain - Root = ArgVal.getOperand(0); + Chain = ArgVal.getOperand(0); } // vararg handling: @@ -1029,25 +1091,25 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) // Create the frame slot for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { - VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset); + VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset, + true, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); - SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); - SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0); - Root = Store.getOperand(0); + unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); + SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); + SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0, + false, false, 0); + Chain = Store.getOperand(0); MemOps.push_back(Store); // Increment address by stack slot size for the next stored argument ArgOffset += StackSlotSize; } if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); } - ArgValues.push_back(Root); - - // Return the new list of results. - return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(), - &ArgValues[0], ArgValues.size()); + return Chain; } /// isLSAAddress - Return the immediate to use if the specified @@ -1064,24 +1126,25 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); } -static SDValue -LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - CallSDNode *TheCall = cast(Op.getNode()); - SDValue Chain = TheCall->getChain(); - SDValue Callee = TheCall->getCallee(); - unsigned NumOps = TheCall->getNumArgs(); +SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + // CellSPU target does not yet support tail call optimization. + isTailCall = false; + + const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); + unsigned NumOps = Outs.size(); unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); // Handy pointer type - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - - // Accumulate how many bytes are to be pushed on the stack, including the - // linkage area, and parameter passing area. According to the SPU ABI, - // we minimally need space for [LR] and [SP] - unsigned NumStackBytes = SPUFrameInfo::minStackSize(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument @@ -1099,22 +1162,25 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { SmallVector MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = TheCall->getArg(i); + SDValue Arg = Outs[i].Val; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - switch (Arg.getValueType().getSimpleVT()) { - default: assert(0 && "Unexpected ValueType for argument!"); + switch (Arg.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i8: + case MVT::i16: case MVT::i32: case MVT::i64: case MVT::i128: if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); ArgOffset += StackSlotSize; } break; @@ -1123,10 +1189,13 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); ArgOffset += StackSlotSize; } break; + case MVT::v2i64: + case MVT::v2f64: case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: @@ -1134,21 +1203,26 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); ArgOffset += StackSlotSize; } break; } } - // Update number of stack bytes actually used, insert a call sequence start - NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize()); + // Accumulate how many bytes are to be pushed on the stack, including the + // linkage area, and parameter passing area. According to the SPU ABI, + // we minimally need space for [LR] and [SP]. + unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize(); + + // Insert a call sequence start Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, true)); if (!MemOpChains.empty()) { // Adjust the stack pointer for the stack arguments. - Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); } @@ -1156,8 +1230,8 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, - InFlag); + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } @@ -1169,7 +1243,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast(Callee)) { GlobalValue *GV = G->getGlobal(); - MVT CalleeVT = Callee.getValueType(); + EVT CalleeVT = Callee.getValueType(); SDValue Zero = DAG.getConstant(0, PtrVT); SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT); @@ -1183,18 +1257,27 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // This may be an unsafe assumption for JIT and really large compilation // units. if (GV->isDeclaration()) { - Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero); + Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); } else { - Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero); + Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); } } else { // "Large memory" mode: Turn all calls into indirect calls with a X-form // address pairs: - Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero); + Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); } - } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) - Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType()); - else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + EVT CalleeVT = Callee.getValueType(); + SDValue Zero = DAG.getConstant(0, PtrVT); + SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), + Callee.getValueType()); + + if (!ST->usingLargeMem()) { + Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); + } else { + Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); + } + } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { // If this is an absolute destination address that appears to be a legal // local store address, use the munged value. Callee = SDValue(Dest, 0); @@ -1212,77 +1295,78 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (InFlag.getNode()) Ops.push_back(InFlag); // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag), + Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), &Ops[0], Ops.size()); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), DAG.getIntPtrConstant(0, true), InFlag); - if (TheCall->getValueType(0) != MVT::Other) + if (!Ins.empty()) InFlag = Chain.getValue(1); - SDValue ResultVals[3]; - unsigned NumResults = 0; + // If the function returns void, just return the chain. + if (Ins.empty()) + return Chain; // If the call has results, copy the values out of the ret val registers. - switch (TheCall->getValueType(0).getSimpleVT()) { - default: assert(0 && "Unexpected ret value!"); + switch (Ins[0].VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected ret value!"); case MVT::Other: break; case MVT::i32: - if (TheCall->getValueType(1) == MVT::i32) { - Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, + if (Ins.size() > 1 && Ins[1].VT == MVT::i32) { + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, + MVT::i32, InFlag).getValue(1); + InVals.push_back(Chain.getValue(0)); + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, Chain.getValue(2)).getValue(1); - ResultVals[1] = Chain.getValue(0); - NumResults = 2; + InVals.push_back(Chain.getValue(0)); } else { - Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, + InFlag).getValue(1); + InVals.push_back(Chain.getValue(0)); } break; case MVT::i64: - Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64, + InFlag).getValue(1); + InVals.push_back(Chain.getValue(0)); + break; + case MVT::i128: + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128, + InFlag).getValue(1); + InVals.push_back(Chain.getValue(0)); break; case MVT::f32: case MVT::f64: - Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0), + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; + InVals.push_back(Chain.getValue(0)); break; case MVT::v2f64: + case MVT::v2i64: case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: - Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0), + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; + InVals.push_back(Chain.getValue(0)); break; } - // If the function returns void, just return the chain. - if (NumResults == 0) - return Chain; - - // Otherwise, merge everything together with a MERGE_VALUES node. - ResultVals[NumResults++] = Chain; - SDValue Res = DAG.getMergeValues(ResultVals, NumResults); - return Res.getValue(Op.getResNo()); + return Chain; } -static SDValue -LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) { +SDValue +SPUTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { + SmallVector RVLocs; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - CCState CCInfo(CC, isVarArg, TM, RVLocs); - CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCInfo.AnalyzeReturn(Outs, RetCC_SPU); // If this is the first return lowered for this function, add the regs to the // liveout set for the function. @@ -1291,21 +1375,21 @@ LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) { DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - SDValue Chain = Op.getOperand(0); SDValue Flag; // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Outs[i].Val, Flag); Flag = Chain.getValue(1); } if (Flag.getNode()) - return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag); + return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); else - return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain); + return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); } @@ -1332,14 +1416,14 @@ getVecImm(SDNode *N) { } } - return 0; // All UNDEF: use implicit def.; not Constant node + return 0; } /// get_vec_i18imm - Test if this vector is a vector filled with the same value /// and the value fits into an unsigned 18-bit constant, and if so, return the /// constant SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType) { + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { uint64_t Value = CN->getZExtValue(); if (ValueType == MVT::i64) { @@ -1361,7 +1445,7 @@ SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, /// and the value fits into a signed 16-bit constant, and if so, return the /// constant SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType) { + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { int64_t Value = CN->getSExtValue(); if (ValueType == MVT::i64) { @@ -1384,7 +1468,7 @@ SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, /// and the value fits into a signed 10-bit constant, and if so, return the /// constant SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType) { + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { int64_t Value = CN->getSExtValue(); if (ValueType == MVT::i64) { @@ -1395,7 +1479,7 @@ SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, return SDValue(); Value = Value >> 32; } - if (isS10Constant(Value)) + if (isInt<10>(Value)) return DAG.getTargetConstant(Value, ValueType); } @@ -1410,7 +1494,7 @@ SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, /// constant vectors. Thus, we test to see if the upper and lower bytes are the /// same value. SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType) { + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { int Value = (int) CN->getZExtValue(); if (ValueType == MVT::i16 @@ -1429,7 +1513,7 @@ SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, /// and the value fits into a signed 16-bit constant, and if so, return the /// constant SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType) { + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { uint64_t Value = CN->getZExtValue(); if ((ValueType == MVT::i32 @@ -1459,269 +1543,178 @@ SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// If this is a vector of constants or undefs, get the bits. A bit in -// UndefBits is set if the corresponding element of the vector is an -// ISD::UNDEF value. For undefs, the corresponding VectorBits values are -// zero. Return true if this is not an array of constants, false if it is. -// -static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], - uint64_t UndefBits[2]) { - // Start with zero'd results. - VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; - - unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits(); - for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { - SDValue OpVal = BV->getOperand(i); - - unsigned PartNo = i >= e/2; // In the upper 128 bits? - unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. - - uint64_t EltBits = 0; - if (OpVal.getOpcode() == ISD::UNDEF) { - uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize); - UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); - continue; - } else if (ConstantSDNode *CN = dyn_cast(OpVal)) { - EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize)); - } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { - const APFloat &apf = CN->getValueAPF(); - EltBits = (CN->getValueType(0) == MVT::f32 - ? FloatToBits(apf.convertToFloat()) - : DoubleToBits(apf.convertToDouble())); - } else { - // Nonconstant element. - return true; - } +//! Lower a BUILD_VECTOR instruction creatively: +static SDValue +LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + BuildVectorSDNode *BCN = dyn_cast(Op.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); + unsigned minSplatBits = EltVT.getSizeInBits(); - VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); - } + if (minSplatBits < 16) + minSplatBits = 16; - //printf("%llx %llx %llx %llx\n", - // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); - return false; -} + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; -/// If this is a splat (repetition) of a value across the whole vector, return -/// the smallest size that splats it. For example, "0x01010101010101..." is a -/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and -/// SplatSize = 1 byte. -static bool isConstantSplat(const uint64_t Bits128[2], - const uint64_t Undef128[2], - int MinSplatBits, - uint64_t &SplatBits, uint64_t &SplatUndef, - int &SplatSize) { - // Don't let undefs prevent splats from matching. See if the top 64-bits are - // the same as the lower 64-bits, ignoring undefs. - uint64_t Bits64 = Bits128[0] | Bits128[1]; - uint64_t Undef64 = Undef128[0] & Undef128[1]; - uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); - uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); - uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); - uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); - - if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) { - if (MinSplatBits < 64) { - - // Check that the top 32-bits are the same as the lower 32-bits, ignoring - // undefs. - if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) { - if (MinSplatBits < 32) { - - // If the top 16-bits are different than the lower 16-bits, ignoring - // undefs, we have an i32 splat. - if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) { - if (MinSplatBits < 16) { - // If the top 8-bits are different than the lower 8-bits, ignoring - // undefs, we have an i16 splat. - if ((Bits16 & (uint16_t(~Undef16) >> 8)) - == ((Bits16 >> 8) & ~Undef16)) { - // Otherwise, we have an 8-bit splat. - SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); - SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); - SplatSize = 1; - return true; - } - } else { - SplatBits = Bits16; - SplatUndef = Undef16; - SplatSize = 2; - return true; - } - } - } else { - SplatBits = Bits32; - SplatUndef = Undef32; - SplatSize = 4; - return true; - } - } - } else { - SplatBits = Bits128[0]; - SplatUndef = Undef128[0]; - SplatSize = 8; - return true; - } - } + if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + || minSplatBits < SplatBitSize) + return SDValue(); // Wasn't a constant vector or splat exceeded min - return false; // Can't be a splat if two pieces don't match. -} + uint64_t SplatBits = APSplatBits.getZExtValue(); -// If this is a case we can't handle, return null and let the default -// expansion code take care of it. If we CAN select this case, and if it -// selects to a single instruction, return Op. Otherwise, if we can codegen -// this case more efficiently than a constant pool load, lower it to the -// sequence of ops that should be used. -static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - // If this is a vector of constants or undefs, get the bits. A bit in - // UndefBits is set if the corresponding element of the vector is an - // ISD::UNDEF value. For undefs, the corresponding VectorBits values are - // zero. - uint64_t VectorBits[2]; - uint64_t UndefBits[2]; - uint64_t SplatBits, SplatUndef; - int SplatSize; - if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits) - || !isConstantSplat(VectorBits, UndefBits, - VT.getVectorElementType().getSizeInBits(), - SplatBits, SplatUndef, SplatSize)) - return SDValue(); // Not a constant vector, not a splat. - - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: + report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + + Twine(VT.getEVTString())); + /*NOTREACHED*/ case MVT::v4f32: { - uint32_t Value32 = SplatBits; - assert(SplatSize == 4 + uint32_t Value32 = uint32_t(SplatBits); + assert(SplatBitSize == 32 && "LowerBUILD_VECTOR: Unexpected floating point vector element."); // NOTE: pretend the constant is an integer. LLVM won't load FP constants SDValue T = DAG.getConstant(Value32, MVT::i32); - return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); break; } case MVT::v2f64: { - uint64_t f64val = SplatBits; - assert(SplatSize == 8 + uint64_t f64val = uint64_t(SplatBits); + assert(SplatBitSize == 64 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); // NOTE: pretend the constant is an integer. LLVM won't load FP constants SDValue T = DAG.getConstant(f64val, MVT::i64); - return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); break; } case MVT::v16i8: { // 8-bit constants have to be expanded to 16-bits - unsigned short Value16 = SplatBits | (SplatBits << 8); - SDValue Ops[8]; - for (int i = 0; i < 8; ++i) - Ops[i] = DAG.getConstant(Value16, MVT::i16); - return DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8)); + unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; + SmallVector Ops; + + Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); } case MVT::v8i16: { - unsigned short Value16; - if (SplatSize == 2) - Value16 = (unsigned short) (SplatBits & 0xffff); - else - Value16 = (unsigned short) (SplatBits | (SplatBits << 8)); - SDValue T = DAG.getConstant(Value16, VT.getVectorElementType()); - SDValue Ops[8]; - for (int i = 0; i < 8; ++i) Ops[i] = T; - return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8); + unsigned short Value16 = SplatBits; + SDValue T = DAG.getConstant(Value16, EltVT); + SmallVector Ops; + + Ops.assign(8, T); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); } case MVT::v4i32: { - unsigned int Value = SplatBits; - SDValue T = DAG.getConstant(Value, VT.getVectorElementType()); - return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T); + SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); + } + case MVT::v2i32: { + SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); } case MVT::v2i64: { - uint64_t val = SplatBits; - uint32_t upper = uint32_t(val >> 32); - uint32_t lower = uint32_t(val); - - if (upper == lower) { - // Magic constant that can be matched by IL, ILA, et. al. - SDValue Val = DAG.getTargetConstant(val, MVT::i64); - return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val); - } else { - SDValue LO32; - SDValue HI32; - SmallVector ShufBytes; - SDValue Result; - bool upper_special, lower_special; - - // NOTE: This code creates common-case shuffle masks that can be easily - // detected as common expressions. It is not attempting to create highly - // specialized masks to replace any and all 0's, 0xff's and 0x80's. - - // Detect if the upper or lower half is a special shuffle mask pattern: - upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000); - lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000); - - // Create lower vector if not a special pattern - if (!lower_special) { - SDValue LO32C = DAG.getConstant(lower, MVT::i32); - LO32 = DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - LO32C, LO32C, LO32C, LO32C)); - } + return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); + } + } - // Create upper vector if not a special pattern - if (!upper_special) { - SDValue HI32C = DAG.getConstant(upper, MVT::i32); - HI32 = DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - HI32C, HI32C, HI32C, HI32C)); - } + return SDValue(); +} - // If either upper or lower are special, then the two input operands are - // the same (basically, one of them is a "don't care") - if (lower_special) - LO32 = HI32; - if (upper_special) - HI32 = LO32; - if (lower_special && upper_special) { - // Unhappy situation... both upper and lower are special, so punt with - // a target constant: - SDValue Zero = DAG.getConstant(0, MVT::i32); - HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero, - Zero, Zero); - } +/*! + */ +SDValue +SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, + DebugLoc dl) { + uint32_t upper = uint32_t(SplatVal >> 32); + uint32_t lower = uint32_t(SplatVal); + + if (upper == lower) { + // Magic constant that can be matched by IL, ILA, et. al. + SDValue Val = DAG.getTargetConstant(upper, MVT::i32); + return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + Val, Val, Val, Val)); + } else { + bool upper_special, lower_special; - for (int i = 0; i < 4; ++i) { - uint64_t val = 0; - for (int j = 0; j < 4; ++j) { - SDValue V; - bool process_upper, process_lower; - val <<= 8; - process_upper = (upper_special && (i & 1) == 0); - process_lower = (lower_special && (i & 1) == 1); - - if (process_upper || process_lower) { - if ((process_upper && upper == 0) - || (process_lower && lower == 0)) - val |= 0x80; - else if ((process_upper && upper == 0xffffffff) - || (process_lower && lower == 0xffffffff)) - val |= 0xc0; - else if ((process_upper && upper == 0x80000000) - || (process_lower && lower == 0x80000000)) - val |= (j == 0 ? 0xe0 : 0x80); - } else - val |= i * 4 + j + ((i & 1) * 16); - } + // NOTE: This code creates common-case shuffle masks that can be easily + // detected as common expressions. It is not attempting to create highly + // specialized masks to replace any and all 0's, 0xff's and 0x80's. + + // Detect if the upper or lower half is a special shuffle mask pattern: + upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); + lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); + + // Both upper and lower are special, lower to a constant pool load: + if (lower_special && upper_special) { + SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, + SplatValCN, SplatValCN); + } - ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); + SDValue LO32; + SDValue HI32; + SmallVector ShufBytes; + SDValue Result; + + // Create lower vector if not a special pattern + if (!lower_special) { + SDValue LO32C = DAG.getConstant(lower, MVT::i32); + LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + LO32C, LO32C, LO32C, LO32C)); + } + + // Create upper vector if not a special pattern + if (!upper_special) { + SDValue HI32C = DAG.getConstant(upper, MVT::i32); + HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + HI32C, HI32C, HI32C, HI32C)); + } + + // If either upper or lower are special, then the two input operands are + // the same (basically, one of them is a "don't care") + if (lower_special) + LO32 = HI32; + if (upper_special) + HI32 = LO32; + + for (int i = 0; i < 4; ++i) { + uint64_t val = 0; + for (int j = 0; j < 4; ++j) { + SDValue V; + bool process_upper, process_lower; + val <<= 8; + process_upper = (upper_special && (i & 1) == 0); + process_lower = (lower_special && (i & 1) == 1); + + if (process_upper || process_lower) { + if ((process_upper && upper == 0) + || (process_lower && lower == 0)) + val |= 0x80; + else if ((process_upper && upper == 0xffffffff) + || (process_lower && lower == 0xffffffff)) + val |= 0xc0; + else if ((process_upper && upper == 0x80000000) + || (process_lower && lower == 0x80000000)) + val |= (j == 0 ? 0xe0 : 0x80); + } else + val |= i * 4 + j + ((i & 1) * 16); } - return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); + ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); } - } - } - return SDValue(); + return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size())); + } } /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on @@ -1738,47 +1731,75 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { /// \note /// SPUISD::SHUFB is eventually selected as Cell's shufb instructions. static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + const ShuffleVectorSDNode *SVN = cast(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - SDValue PermMask = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); if (V2.getOpcode() == ISD::UNDEF) V2 = V1; // If we have a single element being moved from V1 to V2, this can be handled // using the C*[DX] compute mask instructions, but the vector elements have // to be monotonically increasing with one exception element. - MVT EltVT = V1.getValueType().getVectorElementType(); + EVT VecVT = V1.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); unsigned EltsFromV2 = 0; unsigned V2Elt = 0; unsigned V2EltIdx0 = 0; unsigned CurrElt = 0; + unsigned MaxElts = VecVT.getVectorNumElements(); + unsigned PrevElt = 0; + unsigned V0Elt = 0; bool monotonic = true; - if (EltVT == MVT::i8) + bool rotate = true; + + if (EltVT == MVT::i8) { V2EltIdx0 = 16; - else if (EltVT == MVT::i16) + } else if (EltVT == MVT::i16) { V2EltIdx0 = 8; - else if (EltVT == MVT::i32) + } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { V2EltIdx0 = 4; - else - assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); + } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { + V2EltIdx0 = 2; + } else + llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); + + for (unsigned i = 0; i != MaxElts; ++i) { + if (SVN->getMaskElt(i) < 0) + continue; + + unsigned SrcElt = SVN->getMaskElt(i); - for (unsigned i = 0, e = PermMask.getNumOperands(); - EltsFromV2 <= 1 && monotonic && i != e; - ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast(PermMask.getOperand(i))->getZExtValue(); + if (monotonic) { + if (SrcElt >= V2EltIdx0) { + if (1 >= (++EltsFromV2)) { + V2Elt = (V2EltIdx0 - SrcElt) << 2; + } + } else if (CurrElt != SrcElt) { + monotonic = false; + } - if (SrcElt >= V2EltIdx0) { - ++EltsFromV2; - V2Elt = (V2EltIdx0 - SrcElt) << 2; - } else if (CurrElt != SrcElt) { - monotonic = false; + ++CurrElt; } - ++CurrElt; + if (rotate) { + if (PrevElt > 0 && SrcElt < MaxElts) { + if ((PrevElt == SrcElt - 1) + || (PrevElt == MaxElts - 1 && SrcElt == 0)) { + PrevElt = SrcElt; + if (SrcElt == 0) + V0Elt = i; + } else { + rotate = false; + } + } else if (PrevElt == 0) { + // First time through, need to keep track of previous element + PrevElt = SrcElt; + } else { + // This isn't a rotation, takes elements from vector 2 + rotate = false; + } + } } if (EltsFromV2 == 1 && monotonic) { @@ -1786,44 +1807,45 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Initialize temporary register to 0 SDValue InitTempReg = - DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT)); + DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT)); // Copy register's contents as index in SHUFFLE_MASK: SDValue ShufMaskOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32, + DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32, DAG.getTargetConstant(V2Elt, MVT::i32), - DAG.getCopyFromReg(InitTempReg, VReg, PtrVT)); + DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT)); // Use shuffle mask in SHUFB synthetic instruction: - return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp); + return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, + ShufMaskOp); + } else if (rotate) { + int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; + + return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), + V1, DAG.getConstant(rotamt, MVT::i16)); } else { // Convert the SHUFFLE_VECTOR mask's input element units to the // actual bytes. unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector ResultMask; - for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast(PermMask.getOperand(i))->getZExtValue(); - - for (unsigned j = 0; j < BytesPerElement; ++j) { - ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, - MVT::i8)); - } + for (unsigned i = 0, e = MaxElts; i != e; ++i) { + unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); + + for (unsigned j = 0; j < BytesPerElement; ++j) + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); } - SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, + SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &ResultMask[0], ResultMask.size()); - return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask); + return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); } } static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); // Op0 = the scalar + DebugLoc dl = Op.getDebugLoc(); if (Op0.getNode()->getOpcode() == ISD::Constant) { // For a constant, build the appropriate constant vector, which will @@ -1831,13 +1853,13 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { ConstantSDNode *CN = cast(Op0.getNode()); SmallVector ConstVecValues; - MVT VT; + EVT VT; size_t n_copies; // Create a constant vector: - switch (Op.getValueType().getSimpleVT()) { - default: assert(0 && "Unexpected constant value type in " - "LowerSCALAR_TO_VECTOR"); + switch (Op.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected constant value type in " + "LowerSCALAR_TO_VECTOR"); case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; @@ -1850,214 +1872,30 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { for (size_t j = 0; j < n_copies; ++j) ConstVecValues.push_back(CValue); - return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(), + return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), &ConstVecValues[0], ConstVecValues.size()); } else { // Otherwise, copy the value from one register to another: - switch (Op0.getValueType().getSimpleVT()) { - default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR"); + switch (Op0.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: case MVT::f32: case MVT::f64: - return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0); + return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); } } return SDValue(); } -static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) { - switch (Op.getValueType().getSimpleVT()) { - default: - cerr << "CellSPU: Unknown vector multiplication, got " - << Op.getValueType().getMVTString() - << "\n"; - abort(); - /*NOTREACHED*/ - - case MVT::v4i32: { - SDValue rA = Op.getOperand(0); - SDValue rB = Op.getOperand(1); - SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB); - SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA); - SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB); - SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1); - - return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2); - break; - } - - // Multiply two v8i16 vectors (pipeline friendly version): - // a) multiply lower halves, mask off upper 16-bit of 32-bit product - // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes) - // c) Use SELB to select upper and lower halves from the intermediate results - // - // NOTE: We really want to move the SELECT_MASK to earlier to actually get the - // dual-issue. This code does manage to do this, even if it's a little on - // the wacky side - case MVT::v8i16: { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - SDValue Chain = Op.getOperand(0); - SDValue rA = Op.getOperand(0); - SDValue rB = Op.getOperand(1); - unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - - SDValue FSMBOp = - DAG.getCopyToReg(Chain, FSMBIreg, - DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, - DAG.getConstant(0xcccc, MVT::i16))); - - SDValue HHProd = - DAG.getCopyToReg(FSMBOp, HiProdReg, - DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB)); - - SDValue HHProd_v4i32 = - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, - DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32)); - - return DAG.getNode(SPUISD::SELB, MVT::v8i16, - DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB), - DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), - DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, - HHProd_v4i32, - DAG.getConstant(16, MVT::i16))), - DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32)); - } - - // This M00sE is N@stI! (apologies to Monty Python) - // - // SPU doesn't know how to do any 8-bit multiplication, so the solution - // is to break it all apart, sign extend, and reassemble the various - // intermediate products. - case MVT::v16i8: { - SDValue rA = Op.getOperand(0); - SDValue rB = Op.getOperand(1); - SDValue c8 = DAG.getConstant(8, MVT::i32); - SDValue c16 = DAG.getConstant(16, MVT::i32); - - SDValue LLProd = - DAG.getNode(SPUISD::MPY, MVT::v8i16, - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA), - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB)); - - SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8); - - SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8); - - SDValue LHProd = - DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, - DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8); - - SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, - DAG.getConstant(0x2222, MVT::i16)); - - SDValue LoProdParts = - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, - DAG.getNode(SPUISD::SELB, MVT::v8i16, - LLProd, LHProd, FSMBmask)); - - SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32); - - SDValue LoProd = - DAG.getNode(ISD::AND, MVT::v4i32, - LoProdParts, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - LoProdMask, LoProdMask, - LoProdMask, LoProdMask)); - - SDValue rAH = - DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16); - - SDValue rBH = - DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16); - - SDValue HLProd = - DAG.getNode(SPUISD::MPY, MVT::v8i16, - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH), - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH)); - - SDValue HHProd_1 = - DAG.getNode(SPUISD::MPY, MVT::v8i16, - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, - DAG.getNode(SPUISD::VEC_SRA, - MVT::v4i32, rAH, c8)), - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, - DAG.getNode(SPUISD::VEC_SRA, - MVT::v4i32, rBH, c8))); - - SDValue HHProd = - DAG.getNode(SPUISD::SELB, MVT::v8i16, - HLProd, - DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8), - FSMBmask); - - SDValue HiProd = - DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16); - - return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, - DAG.getNode(ISD::OR, MVT::v4i32, - LoProd, HiProd)); - } - } - - return SDValue(); -} - -static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - - SDValue A = Op.getOperand(0); - SDValue B = Op.getOperand(1); - MVT VT = Op.getValueType(); - - unsigned VRegBR, VRegC; - - if (VT == MVT::f32) { - VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); - VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); - } else { - VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - } - // TODO: make sure we're feeding FPInterp the right arguments - // Right now: fi B, frest(B) - - // Computes BRcpl = - // (Floating Interpolate (FP Reciprocal Estimate B)) - SDValue BRcpl = - DAG.getCopyToReg(DAG.getEntryNode(), VRegBR, - DAG.getNode(SPUISD::FPInterp, VT, B, - DAG.getNode(SPUISD::FPRecipEst, VT, B))); - - // Computes A * BRcpl and stores in a temporary register - SDValue AxBRcpl = - DAG.getCopyToReg(BRcpl, VRegC, - DAG.getNode(ISD::FMUL, VT, A, - DAG.getCopyFromReg(BRcpl, VRegBR, VT))); - // What's the Chain variable do? It's magic! - // TODO: set Chain = Op(0).getEntryNode() - - return DAG.getNode(ISD::FADD, VT, - DAG.getCopyFromReg(AxBRcpl, VRegC, VT), - DAG.getNode(ISD::FMUL, VT, - DAG.getCopyFromReg(AxBRcpl, VRegBR, VT), - DAG.getNode(ISD::FSUB, VT, A, - DAG.getNode(ISD::FMUL, VT, B, - DAG.getCopyFromReg(AxBRcpl, VRegC, VT))))); -} - static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue N = Op.getOperand(0); SDValue Elt = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); SDValue retval; if (ConstantSDNode *C = dyn_cast(Elt)) { @@ -2066,24 +1904,24 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // sanity checks: if (VT == MVT::i8 && EltNo >= 16) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); else if (VT == MVT::i16 && EltNo >= 8) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); else if (VT == MVT::i32 && EltNo >= 4) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); else if (VT == MVT::i64 && EltNo >= 2) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { // i32 and i64: Element 0 is the preferred slot - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N); + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); } // Need to generate shuffle mask and extract: int prefslot_begin = -1, prefslot_end = -1; int elt_byte = EltNo * VT.getSizeInBits() / 8; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Invalid value type!"); case MVT::i8: { @@ -2109,7 +1947,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { assert(prefslot_begin != -1 && prefslot_end != -1 && "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); - unsigned int ShufBytes[16]; + unsigned int ShufBytes[16] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; for (int i = 0; i < 16; ++i) { // zero fill uppper part of preferred slot, don't care about the // other slots: @@ -2127,7 +1967,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue ShufMask[4]; for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { - unsigned bidx = i / 4; + unsigned bidx = i * 4; unsigned int bits = ((ShufBytes[bidx] << 24) | (ShufBytes[bidx+1] << 16) | (ShufBytes[bidx+2] << 8) | @@ -2135,25 +1975,25 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { ShufMask[i] = DAG.getConstant(bits, MVT::i32); } - SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufMask[0], - sizeof(ShufMask) / sizeof(ShufMask[0])); + SDValue ShufMaskVec = + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); - retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(SPUISD::SHUFB, N.getValueType(), + retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), N, N, ShufMaskVec)); } else { // Variable index: Rotate the requested element into slot 0, then replicate // slot 0 across the vector - MVT VecVT = N.getValueType(); + EVT VecVT = N.getValueType(); if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { - cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n"; - abort(); + report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" + "vector type!"); } // Make life easier by making sure the index is zero-extended to i32 if (Elt.getValueType() != MVT::i32) - Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt); + Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); // Scale the index to a bit/byte shift quantity APInt scaleFactor = @@ -2163,52 +2003,52 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { if (scaleShift > 0) { // Scale the shift factor: - Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt, + Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, DAG.getConstant(scaleShift, MVT::i32)); } - vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt); + vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt); // Replicate the bytes starting at byte 0 across the entire vector (for // consistency with the notion of a unified register set) SDValue replicate; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: - cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n"; - abort(); + report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" + "type"); /*NOTREACHED*/ case MVT::i8: { SDValue factor = DAG.getConstant(0x00000000, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor, - factor, factor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); break; } case MVT::i16: { SDValue factor = DAG.getConstant(0x00010001, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor, - factor, factor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); break; } case MVT::i32: case MVT::f32: { SDValue factor = DAG.getConstant(0x00010203, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor, - factor, factor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); break; } case MVT::i64: case MVT::f64: { SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor, - loFactor, hiFactor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + loFactor, hiFactor, loFactor, hiFactor); break; } } - retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(SPUISD::SHUFB, VecVT, + retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(SPUISD::SHUFB, dl, VecVT, vecShift, vecShift, replicate)); } @@ -2219,325 +2059,126 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue VecOp = Op.getOperand(0); SDValue ValOp = Op.getOperand(1); SDValue IdxOp = Op.getOperand(2); - MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + EVT VT = Op.getValueType(); ConstantSDNode *CN = cast(IdxOp); assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Use $sp ($1) because it's always 16-byte aligned and it's available: - SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, DAG.getRegister(SPU::R1, PtrVT), DAG.getConstant(CN->getSExtValue(), PtrVT)); - SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer); + SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); SDValue result = - DAG.getNode(SPUISD::SHUFB, VT, - DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp), - VecOp, - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask)); + DAG.getNode(SPUISD::SHUFB, dl, VT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), + VecOp, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask)); return result; } -static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) +static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, + const TargetLowering &TLI) { SDValue N0 = Op.getOperand(0); // Everything has at least one operand + DebugLoc dl = Op.getDebugLoc(); + EVT ShiftVT = TLI.getShiftAmountTy(); assert(Op.getValueType() == MVT::i8); switch (Opc) { default: - assert(0 && "Unhandled i8 math operator"); + llvm_unreachable("Unhandled i8 math operator"); /*NOTREACHED*/ break; + case ISD::ADD: { + // 8-bit addition: Promote the arguments up to 16-bits and truncate + // the result: + SDValue N1 = Op.getOperand(1); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + + } + case ISD::SUB: { // 8-bit subtraction: Promote the arguments up to 16-bits and truncate // the result: SDValue N1 = Op.getOperand(1); - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i16)); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, N0, N1)); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); } case ISD::ROTR: case ISD::ROTL: { SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(MVT::i32) - ? ISD::ZERO_EXTEND - : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i32, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i32)); + EVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) + ? ISD::ZERO_EXTEND + : ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + // Replicate lower 8-bits into upper 8: SDValue ExpandArg = - DAG.getNode(ISD::OR, MVT::i16, N0, - DAG.getNode(ISD::SHL, MVT::i16, + DAG.getNode(ISD::OR, dl, MVT::i16, N0, + DAG.getNode(ISD::SHL, dl, MVT::i16, N0, DAG.getConstant(8, MVT::i32))); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, ExpandArg, N1)); + + // Truncate back down to i8 + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); } case ISD::SRL: case ISD::SHL: { SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(MVT::i16) - ? ISD::ZERO_EXTEND - : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i16)); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, N0, N1)); - } - case ISD::SRA: { - SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(MVT::i16) - ? ISD::SIGN_EXTEND - : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i16)); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, N0, N1)); - } - case ISD::MUL: { - SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i16)); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, N0, N1)); - break; - } - } + EVT N1VT = N1.getValueType(); - return SDValue(); -} + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = ISD::ZERO_EXTEND; -static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) -{ - MVT VT = Op.getValueType(); - MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); - - SDValue Op0 = Op.getOperand(0); + if (N1.getValueType().bitsGT(ShiftVT)) + N1Opc = ISD::TRUNCATE; - switch (Opc) { - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::ANY_EXTEND: { - MVT Op0VT = Op0.getValueType(); - MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); - - assert(Op0VT == MVT::i32 - && "CellSPU: Zero/sign extending something other than i32"); - - DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n"); - - SDValue PromoteScalar = - DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0); - - if (Opc != ISD::SIGN_EXTEND) { - // Use a shuffle to zero extend the i32 to i64 directly: - SDValue shufMask = - DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT, - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x00010203, MVT::i32), - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x08090a0b, MVT::i32)); - SDValue zextShuffle = - DAG.getNode(SPUISD::SHUFB, Op0VecVT, - PromoteScalar, PromoteScalar, shufMask); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle)); - } else { - // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift - // right and propagate the sign bit) instruction. - SDValue RotQuad = - DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT, - PromoteScalar, DAG.getConstant(4, MVT::i32)); - SDValue SignQuad = - DAG.getNode(SPUISD::VEC_SRA, Op0VecVT, - PromoteScalar, DAG.getConstant(32, MVT::i32)); - SDValue SelMask = - DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT, - DAG.getConstant(0xf0f0, MVT::i16)); - SDValue CombineQuad = - DAG.getNode(SPUISD::SELB, Op0VecVT, - SignQuad, RotQuad, SelMask); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad)); + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); } - } - case ISD::ADD: { - // Turn operands into vectors to satisfy type checking (shufb works on - // vectors) - SDValue Op0 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0)); - SDValue Op1 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1)); - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - - SDValue CarryGen = - DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1); - SDValue ShiftedCarry = - DAG.getNode(SPUISD::SHUFB, MVT::v2i64, - CarryGen, CarryGen, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, - DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64, - Op0, Op1, ShiftedCarry)); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); } + case ISD::SRA: { + SDValue N1 = Op.getOperand(1); + EVT N1VT = N1.getValueType(); - case ISD::SUB: { - // Turn operands into vectors to satisfy type checking (shufb works on - // vectors) - SDValue Op0 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0)); - SDValue Op1 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1)); - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - - SDValue BorrowGen = - DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1); - SDValue ShiftedBorrow = - DAG.getNode(SPUISD::SHUFB, MVT::v2i64, - BorrowGen, BorrowGen, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = ISD::SIGN_EXTEND; - return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, - DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64, - Op0, Op1, ShiftedBorrow)); - } + if (N1VT.bitsGT(ShiftVT)) + N1Opc = ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } - case ISD::SHL: { - SDValue ShiftAmt = Op.getOperand(1); - MVT ShiftAmtVT = ShiftAmt.getValueType(); - SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0); - SDValue MaskLower = - DAG.getNode(SPUISD::SELB, VecVT, - Op0Vec, - DAG.getConstant(0, VecVT), - DAG.getNode(SPUISD::SELECT_MASK, VecVT, - DAG.getConstant(0xff00ULL, MVT::i16))); - SDValue ShiftAmtBytes = - DAG.getNode(ISD::SRL, ShiftAmtVT, - ShiftAmt, - DAG.getConstant(3, ShiftAmtVT)); - SDValue ShiftAmtBits = - DAG.getNode(ISD::AND, ShiftAmtVT, - ShiftAmt, - DAG.getConstant(7, ShiftAmtVT)); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT, - DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, - MaskLower, ShiftAmtBytes), - ShiftAmtBits)); - } - - case ISD::SRL: { - MVT VT = Op.getValueType(); - SDValue ShiftAmt = Op.getOperand(1); - MVT ShiftAmtVT = ShiftAmt.getValueType(); - SDValue ShiftAmtBytes = - DAG.getNode(ISD::SRL, ShiftAmtVT, - ShiftAmt, - DAG.getConstant(3, ShiftAmtVT)); - SDValue ShiftAmtBits = - DAG.getNode(ISD::AND, ShiftAmtVT, - ShiftAmt, - DAG.getConstant(7, ShiftAmtVT)); - - return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT, - DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT, - Op0, ShiftAmtBytes), - ShiftAmtBits); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); } + case ISD::MUL: { + SDValue N1 = Op.getOperand(1); - case ISD::SRA: { - // Promote Op0 to vector - SDValue Op0 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0)); - SDValue ShiftAmt = Op.getOperand(1); - MVT ShiftVT = ShiftAmt.getValueType(); - - // Negate variable shift amounts - if (!isa(ShiftAmt)) { - ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT, - DAG.getConstant(0, ShiftVT), ShiftAmt); - } - - SDValue UpperHalfSign = - DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32, - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, - DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64, - Op0, DAG.getConstant(31, MVT::i32)))); - SDValue UpperHalfSignMask = - DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign); - SDValue UpperLowerMask = - DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, - DAG.getConstant(0xff00, MVT::i16)); - SDValue UpperLowerSelect = - DAG.getNode(SPUISD::SELB, MVT::v2i64, - UpperHalfSignMask, Op0, UpperLowerMask); - SDValue RotateLeftBytes = - DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64, - UpperLowerSelect, ShiftAmt); - SDValue RotateLeftBits = - DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64, - RotateLeftBytes, ShiftAmt); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, - RotateLeftBits); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + break; } } @@ -2549,7 +2190,8 @@ static SDValue LowerByteImmed(SDValue Op, SelectionDAG &DAG) { SDValue ConstVec; SDValue Arg; - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); ConstVec = Op.getOperand(0); Arg = Op.getOperand(1); @@ -2566,58 +2208,32 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) { } if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { - uint64_t VectorBits[2]; - uint64_t UndefBits[2]; - uint64_t SplatBits, SplatUndef; - int SplatSize; - - if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits) - && isConstantSplat(VectorBits, UndefBits, - VT.getVectorElementType().getSizeInBits(), - SplatBits, SplatUndef, SplatSize)) { - SDValue tcVec[16]; + BuildVectorSDNode *BCN = dyn_cast(ConstVec.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); + + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); + + if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + && minSplatBits <= SplatBitSize) { + uint64_t SplatBits = APSplatBits.getZExtValue(); SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); - const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]); - // Turn the BUILD_VECTOR into a set of target constants: - for (size_t i = 0; i < tcVecSize; ++i) - tcVec[i] = tc; - - return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg, - DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize)); + SmallVector tcVec; + tcVec.assign(16, tc); + return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); } } + // These operations (AND, OR, XOR) are legal, they just couldn't be custom // lowered. Return the operation, rather than a null SDValue. return Op; } -//! Lower i32 multiplication -static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT, - unsigned Opc) { - switch (VT.getSimpleVT()) { - default: - cerr << "CellSPU: Unknown LowerMUL value type, got " - << Op.getValueType().getMVTString() - << "\n"; - abort(); - /*NOTREACHED*/ - - case MVT::i32: { - SDValue rA = Op.getOperand(0); - SDValue rB = Op.getOperand(1); - - return DAG.getNode(ISD::ADD, MVT::i32, - DAG.getNode(ISD::ADD, MVT::i32, - DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB), - DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)), - DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB)); - } - } - - return SDValue(); -} - //! Custom lowering for CTPOP (count population) /*! Custom lowering code that counts the number ones in the input @@ -2625,20 +2241,22 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT, ones per byte, which then have to be accumulated. */ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); + EVT VT = Op.getValueType(); + EVT vecVT = EVT::getVectorVT(*DAG.getContext(), + VT, (128 / VT.getSizeInBits())); + DebugLoc dl = Op.getDebugLoc(); - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Invalid value type!"); case MVT::i8: { SDValue N = Op.getOperand(0); SDValue Elt0 = DAG.getConstant(0, MVT::i32); - SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); - SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); } case MVT::i16: { @@ -2652,22 +2270,22 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); SDValue Shift1 = DAG.getConstant(8, MVT::i32); - SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); - SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); // CNTB_result becomes the chain to which all of the virtual registers // CNTB_reg, SUM1_reg become associated: SDValue CNTB_result = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0); + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); SDValue CNTB_rescopy = - DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result); + DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); - SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16); + SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); - return DAG.getNode(ISD::AND, MVT::i16, - DAG.getNode(ISD::ADD, MVT::i16, - DAG.getNode(ISD::SRL, MVT::i16, + return DAG.getNode(ISD::AND, dl, MVT::i16, + DAG.getNode(ISD::ADD, dl, MVT::i16, + DAG.getNode(ISD::SRL, dl, MVT::i16, Tmp1, Shift1), Tmp1), Mask0); @@ -2686,37 +2304,38 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { SDValue Shift1 = DAG.getConstant(16, MVT::i32); SDValue Shift2 = DAG.getConstant(8, MVT::i32); - SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); - SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); // CNTB_result becomes the chain to which all of the virtual registers // CNTB_reg, SUM1_reg become associated: SDValue CNTB_result = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0); + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); SDValue CNTB_rescopy = - DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result); + DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); SDValue Comp1 = - DAG.getNode(ISD::SRL, MVT::i32, - DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1); + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), + Shift1); SDValue Sum1 = - DAG.getNode(ISD::ADD, MVT::i32, - Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32)); + DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, + DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); SDValue Sum1_rescopy = - DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1); + DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); SDValue Comp2 = - DAG.getNode(ISD::SRL, MVT::i32, - DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32), + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), Shift2); SDValue Sum2 = - DAG.getNode(ISD::ADD, MVT::i32, Comp2, - DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32)); + DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, + DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); - return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0); + return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); } case MVT::i64: @@ -2726,6 +2345,182 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { return SDValue(); } +//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 +/*! + f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. + All conversions to i64 are expanded to a libcall. + */ +static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + SPUTargetLowering &TLI) { + EVT OpVT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + EVT Op0VT = Op0.getValueType(); + + if ((OpVT == MVT::i32 && Op0VT == MVT::f64) + || OpVT == MVT::i64) { + // Convert f32 / f64 to i32 / i64 via libcall. + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::FP_TO_SINT) + ? RTLIB::getFPTOSINT(Op0VT, OpVT) + : RTLIB::getFPTOUINT(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } + + return Op; +} + +//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 +/*! + i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. + All conversions from i64 are expanded to a libcall. + */ +static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, + SPUTargetLowering &TLI) { + EVT OpVT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + EVT Op0VT = Op0.getValueType(); + + if ((OpVT == MVT::f64 && Op0VT == MVT::i32) + || Op0VT == MVT::i64) { + // Convert i32, i64 to f64 via libcall: + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::SINT_TO_FP) + ? RTLIB::getSINTTOFP(Op0VT, OpVT) + : RTLIB::getUINTTOFP(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } + + return Op; +} + +//! Lower ISD::SETCC +/*! + This handles MVT::f64 (double floating point) condition lowering + */ +static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + CondCodeSDNode *CC = dyn_cast(Op.getOperand(2)); + DebugLoc dl = Op.getDebugLoc(); + assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); + + SDValue lhs = Op.getOperand(0); + SDValue rhs = Op.getOperand(1); + EVT lhsVT = lhs.getValueType(); + assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); + + EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); + APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + EVT IntVT(MVT::i64); + + // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently + // selected to a NOP: + SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs); + SDValue lhsHi32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::SRL, dl, IntVT, + i64lhs, DAG.getConstant(32, MVT::i32))); + SDValue lhsHi32abs = + DAG.getNode(ISD::AND, dl, MVT::i32, + lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); + SDValue lhsLo32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); + + // SETO and SETUO only use the lhs operand: + if (CC->get() == ISD::SETO) { + // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of + // SETUO + APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, dl, ccResultVT, + DAG.getSetCC(dl, ccResultVT, + lhs, DAG.getConstantFP(0.0, lhsVT), + ISD::SETUO), + DAG.getConstant(ccResultAllOnes, ccResultVT)); + } else if (CC->get() == ISD::SETUO) { + // Evaluates to true if Op0 is [SQ]NaN + return DAG.getNode(ISD::AND, dl, ccResultVT, + DAG.getSetCC(dl, ccResultVT, + lhsHi32abs, + DAG.getConstant(0x7ff00000, MVT::i32), + ISD::SETGE), + DAG.getSetCC(dl, ccResultVT, + lhsLo32, + DAG.getConstant(0, MVT::i32), + ISD::SETGT)); + } + + SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs); + SDValue rhsHi32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::SRL, dl, IntVT, + i64rhs, DAG.getConstant(32, MVT::i32))); + + // If a value is negative, subtract from the sign magnitude constant: + SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); + + // Convert the sign-magnitude representation into 2's complement: + SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, + lhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); + SDValue lhsSelect = + DAG.getNode(ISD::SELECT, dl, IntVT, + lhsSelectMask, lhsSignMag2TC, i64lhs); + + SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, + rhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); + SDValue rhsSelect = + DAG.getNode(ISD::SELECT, dl, IntVT, + rhsSelectMask, rhsSignMag2TC, i64rhs); + + unsigned compareOp; + + switch (CC->get()) { + case ISD::SETOEQ: + case ISD::SETUEQ: + compareOp = ISD::SETEQ; break; + case ISD::SETOGT: + case ISD::SETUGT: + compareOp = ISD::SETGT; break; + case ISD::SETOGE: + case ISD::SETUGE: + compareOp = ISD::SETGE; break; + case ISD::SETOLT: + case ISD::SETULT: + compareOp = ISD::SETLT; break; + case ISD::SETOLE: + case ISD::SETULE: + compareOp = ISD::SETLE; break; + case ISD::SETUNE: + case ISD::SETONE: + compareOp = ISD::SETNE; break; + default: + report_fatal_error("CellSPU ISel Select: unimplemented f64 condition"); + } + + SDValue result = + DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, + (ISD::CondCode) compareOp); + + if ((CC->get() & 0x8) == 0) { + // Ordered comparison: + SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, + lhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, + rhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); + + result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); + } + + return result; +} + //! Lower ISD::SELECT_CC /*! ISD::SELECT_CC can (generally) be implemented directly on the SPU using the @@ -2738,116 +2533,118 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { assumption, given the simplisitc uses so far. */ -static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); +static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT = Op.getValueType(); SDValue lhs = Op.getOperand(0); SDValue rhs = Op.getOperand(1); SDValue trueval = Op.getOperand(2); SDValue falseval = Op.getOperand(3); SDValue condition = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); + + // NOTE: SELB's arguments: $rA, $rB, $mask + // + // SELB selects bits from $rA where bits in $mask are 0, bits from $rB + // where bits in $mask are 1. CCond will be inverted, having 1s where the + // condition was true and 0s where the condition was false. Hence, the + // arguments to SELB get reversed. // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up // with another "cannot select select_cc" assert: - SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition); - return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare); + SDValue compare = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(Op.getValueType()), + lhs, rhs, condition); + return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); } //! Custom lower ISD::TRUNCATE static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT::SimpleValueType simpleVT = VT.getSimpleVT(); - MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); - + // Type to truncate to + EVT VT = Op.getValueType(); + MVT simpleVT = VT.getSimpleVT(); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), + VT, (128 / VT.getSizeInBits())); + DebugLoc dl = Op.getDebugLoc(); + + // Type to truncate from SDValue Op0 = Op.getOperand(0); - MVT Op0VT = Op0.getValueType(); - MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); + EVT Op0VT = Op0.getValueType(); - SDValue PromoteScalar = DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0); + if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) { + // Create shuffle mask, least significant doubleword of quadword + unsigned maskHigh = 0x08090a0b; + unsigned maskLow = 0x0c0d0e0f; + // Use a shuffle to perform the truncation + SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + DAG.getConstant(maskHigh, MVT::i32), + DAG.getConstant(maskLow, MVT::i32), + DAG.getConstant(maskHigh, MVT::i32), + DAG.getConstant(maskLow, MVT::i32)); - unsigned maskLow; - unsigned maskHigh; + SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, + Op0, Op0, shufMask); - // Create shuffle mask - switch (Op0VT.getSimpleVT()) { - case MVT::i128: - switch (simpleVT) { - case MVT::i64: - // least significant doubleword of quadword - maskHigh = 0x08090a0b; - maskLow = 0x0c0d0e0f; - break; - case MVT::i32: - // least significant word of quadword - maskHigh = maskLow = 0x0c0d0e0f; - break; - case MVT::i16: - // least significant halfword of quadword - maskHigh = maskLow = 0x0e0f0e0f; - break; - case MVT::i8: - // least significant byte of quadword - maskHigh = maskLow = 0x0f0f0f0f; - break; - default: - cerr << "Truncation to illegal type!"; - abort(); - } - break; - case MVT::i64: - switch (simpleVT) { - case MVT::i32: - // least significant word of doubleword - maskHigh = maskLow = 0x04050607; - break; - case MVT::i16: - // least significant halfword of doubleword - maskHigh = maskLow = 0x06070607; - break; - case MVT::i8: - // least significant byte of doubleword - maskHigh = maskLow = 0x07070707; - break; - default: - cerr << "Truncation to illegal type!"; - abort(); - } - break; - case MVT::i32: - case MVT::i16: - switch (simpleVT) { - case MVT::i16: - // least significant halfword of word - maskHigh = maskLow = 0x02030203; - break; - case MVT::i8: - // least significant byte of word/halfword - maskHigh = maskLow = 0x03030303; - break; - default: - cerr << "Truncation to illegal type!"; - abort(); - } - break; - default: - cerr << "Trying to lower truncation from illegal type!"; - abort(); + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); } - // Use a shuffle to perform the truncation - SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - DAG.getConstant(maskHigh, MVT::i32), - DAG.getConstant(maskLow, MVT::i32), - DAG.getConstant(maskHigh, MVT::i32), - DAG.getConstant(maskLow, MVT::i32)); + return SDValue(); // Leave the truncate unmolested +} + +/*! + * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic + * algorithm is to duplicate the sign bit using rotmai to generate at + * least one byte full of sign bits. Then propagate the "sign-byte" into + * the leftmost words and the i64/i32 into the rightmost words using shufb. + * + * @param Op The sext operand + * @param DAG The current DAG + * @return The SDValue with the entire instruction sequence + */ +static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) +{ + DebugLoc dl = Op.getDebugLoc(); + + // Type to extend to + MVT OpVT = Op.getValueType().getSimpleVT(); - SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, - PromoteScalar, PromoteScalar, shufMask); + // Type to extend from + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType().getSimpleVT(); + + // The type to extend to needs to be a i128 and + // the type to extend from needs to be i64 or i32. + assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && + "LowerSIGN_EXTEND: input and/or output operand have wrong size"); - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle)); + // Create shuffle mask + unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 + unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11 + unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15 + SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + DAG.getConstant(mask1, MVT::i32), + DAG.getConstant(mask1, MVT::i32), + DAG.getConstant(mask2, MVT::i32), + DAG.getConstant(mask3, MVT::i32)); + + // Word wise arithmetic right shift to generate at least one byte + // that contains sign bits. + MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32; + SDValue sraVal = DAG.getNode(ISD::SRA, + dl, + mvt, + DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0), + DAG.getConstant(31, MVT::i32)); + + // Shuffle bytes - Copy the sign bits into the upper 64 bits + // and the input value into the lower 64 bits. + SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt, + DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask); + + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle); } //! Custom (target-specific) lowering entry point @@ -2859,15 +2656,17 @@ SDValue SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { unsigned Opc = (unsigned) Op.getOpcode(); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); switch (Opc) { default: { - cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; - cerr << "Op.getOpcode() = " << Opc << "\n"; - cerr << "*Op.getNode():\n"; +#ifndef NDEBUG + errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; + errs() << "Op.getOpcode() = " << Opc << "\n"; + errs() << "*Op.getNode():\n"; Op.getNode()->dump(); - abort(); +#endif + llvm_unreachable(0); } case ISD::LOAD: case ISD::EXTLOAD: @@ -2882,24 +2681,10 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); case ISD::JumpTable: return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::Constant: - return LowerConstant(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG); - case ISD::BRCOND: - return LowerBRCOND(Op, DAG); - case ISD::FORMAL_ARGUMENTS: - return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); - case ISD::CALL: - return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::RET: - return LowerRET(Op, DAG, getTargetMachine()); - // i8, i64 math ops: - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::ANY_EXTEND: case ISD::ADD: case ISD::SUB: case ISD::ROTR: @@ -2908,12 +2693,18 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::SHL: case ISD::SRA: { if (VT == MVT::i8) - return LowerI8Math(Op, DAG, Opc); - else if (VT == MVT::i64) - return LowerI64Math(Op, DAG, Opc); + return LowerI8Math(Op, DAG, Opc, *this); break; } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + return LowerFP_TO_INT(Op, DAG, *this); + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return LowerINT_TO_FP(Op, DAG, *this); + // Vector-related lowering. case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); @@ -2934,32 +2725,23 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) // Vector and i8 multiply: case ISD::MUL: - if (VT.isVector()) - return LowerVectorMUL(Op, DAG); - else if (VT == MVT::i8) - return LowerI8Math(Op, DAG, Opc); - else - return LowerMUL(Op, DAG, VT, Opc); - - case ISD::FDIV: - if (VT == MVT::f32 || VT == MVT::v4f32) - return LowerFDIVf32(Op, DAG); -#if 0 - // This is probably a libcall - else if (Op.getValueType() == MVT::f64) - return LowerFDIVf64(Op, DAG); -#endif - else - assert(0 && "Calling FDIV on unsupported MVT"); + if (VT == MVT::i8) + return LowerI8Math(Op, DAG, Opc, *this); case ISD::CTPOP: return LowerCTPOP(Op, DAG); case ISD::SELECT_CC: - return LowerSELECT_CC(Op, DAG); + return LowerSELECT_CC(Op, DAG, *this); + + case ISD::SETCC: + return LowerSETCC(Op, DAG, *this); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); + + case ISD::SIGN_EXTEND: + return LowerSIGN_EXTEND(Op, DAG); } return SDValue(); @@ -2971,13 +2753,13 @@ void SPUTargetLowering::ReplaceNodeResults(SDNode *N, { #if 0 unsigned Opc = (unsigned) N->getOpcode(); - MVT OpVT = N->getValueType(0); + EVT OpVT = N->getValueType(0); switch (Opc) { default: { - cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; - cerr << "Op.getOpcode() = " << Opc << "\n"; - cerr << "*Op.getNode():\n"; + errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; + errs() << "Op.getOpcode() = " << Opc << "\n"; + errs() << "*Op.getNode():\n"; N->dump(); abort(); /*NOTREACHED*/ @@ -3001,58 +2783,63 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); SelectionDAG &DAG = DCI.DAG; SDValue Op0 = N->getOperand(0); // everything has at least one operand - MVT NodeVT = N->getValueType(0); // The node's value type - MVT Op0VT = Op0.getValueType(); // The first operand's result + EVT NodeVT = N->getValueType(0); // The node's value type + EVT Op0VT = Op0.getValueType(); // The first operand's result SDValue Result; // Initially, empty result + DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: break; case ISD::ADD: { SDValue Op1 = N->getOperand(1); - if (isa(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) { - SDValue Op01 = Op0.getOperand(1); - if (Op01.getOpcode() == ISD::Constant - || Op01.getOpcode() == ISD::TargetConstant) { - // (add , (SPUindirect , )) -> - // (SPUindirect , ) - ConstantSDNode *CN0 = cast(Op1); - ConstantSDNode *CN1 = cast(Op01); - SDValue combinedConst = - DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT); - -#if defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - cerr << "\n" - << "Replace: (add " << CN0->getZExtValue() << ", " - << "(SPUindirect , " << CN1->getZExtValue() << "))\n" + if (Op0.getOpcode() == SPUISD::IndirectAddr + || Op1.getOpcode() == SPUISD::IndirectAddr) { + // Normalize the operands to reduce repeated code + SDValue IndirectArg = Op0, AddArg = Op1; + + if (Op1.getOpcode() == SPUISD::IndirectAddr) { + IndirectArg = Op1; + AddArg = Op0; + } + + if (isa(AddArg)) { + ConstantSDNode *CN0 = cast (AddArg); + SDValue IndOp1 = IndirectArg.getOperand(1); + + if (CN0->isNullValue()) { + // (add (SPUindirect , ), 0) -> + // (SPUindirect , ) + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "\n" + << "Replace: (add (SPUindirect , ), 0)\n" + << "With: (SPUindirect , )\n"; + } +#endif + + return IndirectArg; + } else if (isa(IndOp1)) { + // (add (SPUindirect , ), ) -> + // (SPUindirect , ) + ConstantSDNode *CN1 = cast (IndOp1); + int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); + SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "\n" + << "Replace: (add (SPUindirect , " << CN1->getSExtValue() + << "), " << CN0->getSExtValue() << ")\n" << "With: (SPUindirect , " - << CN0->getZExtValue() + CN1->getZExtValue() << ")\n"; - } + << combinedConst << ")\n"; + } #endif - return DAG.getNode(SPUISD::IndirectAddr, Op0VT, - Op0.getOperand(0), combinedConst); - } - } else if (isa(Op0) - && Op1.getOpcode() == SPUISD::IndirectAddr) { - SDValue Op11 = Op1.getOperand(1); - if (Op11.getOpcode() == ISD::Constant - || Op11.getOpcode() == ISD::TargetConstant) { - // (add (SPUindirect , ), ) -> - // (SPUindirect , ) - ConstantSDNode *CN0 = cast(Op0); - ConstantSDNode *CN1 = cast(Op11); - SDValue combinedConst = - DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT); - - DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", " - << "(SPUindirect , " << CN1->getZExtValue() << "))\n"); - DEBUG(cerr << "With: (SPUindirect , " - << CN0->getZExtValue() + CN1->getZExtValue() << ")\n"); - - return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(), - Op1.getOperand(0), combinedConst); + return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, + IndirectArg, combinedValue); + } } } break; @@ -3064,14 +2851,14 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const // (any_extend (SPUextract_elt0 )) -> // (SPUextract_elt0 ) // Types must match, however... -#if defined(NDEBUG) - // if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - cerr << "\nReplace: "; +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "\nReplace: "; N->dump(&DAG); - cerr << "\nWith: "; + errs() << "\nWith: "; Op0.getNode()->dump(&DAG); - cerr << "\n"; - + errs() << "\n"; + } #endif return Op0; @@ -3080,50 +2867,64 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const } case SPUISD::IndirectAddr: { if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { - ConstantSDNode *CN = cast(N->getOperand(1)); - if (CN->getZExtValue() == 0) { + ConstantSDNode *CN = dyn_cast(N->getOperand(1)); + if (CN != 0 && CN->getZExtValue() == 0) { // (SPUindirect (SPUaform , 0), 0) -> // (SPUaform , 0) - DEBUG(cerr << "Replace: "); + DEBUG(errs() << "Replace: "); DEBUG(N->dump(&DAG)); - DEBUG(cerr << "\nWith: "); + DEBUG(errs() << "\nWith: "); DEBUG(Op0.getNode()->dump(&DAG)); - DEBUG(cerr << "\n"); + DEBUG(errs() << "\n"); return Op0; } + } else if (Op0.getOpcode() == ISD::ADD) { + SDValue Op1 = N->getOperand(1); + if (ConstantSDNode *CN1 = dyn_cast(Op1)) { + // (SPUindirect (add , ), 0) -> + // (SPUindirect , ) + if (CN1->isNullValue()) { + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "\n" + << "Replace: (SPUindirect (add , ), 0)\n" + << "With: (SPUindirect , )\n"; + } +#endif + + return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, + Op0.getOperand(0), Op0.getOperand(1)); + } + } } break; } case SPUISD::SHLQUAD_L_BITS: case SPUISD::SHLQUAD_L_BYTES: - case SPUISD::VEC_SHL: - case SPUISD::VEC_SRL: - case SPUISD::VEC_SRA: - case SPUISD::ROTQUAD_RZ_BYTES: - case SPUISD::ROTQUAD_RZ_BITS: { + case SPUISD::ROTBYTES_LEFT: { SDValue Op1 = N->getOperand(1); - if (isa(Op1)) { - // Kill degenerate vector shifts: - ConstantSDNode *CN = cast(Op1); - if (CN->getZExtValue() == 0) { + // Kill degenerate vector shifts: + if (ConstantSDNode *CN = dyn_cast(Op1)) { + if (CN->isNullValue()) { Result = Op0; } } break; } - case SPUISD::PROMOTE_SCALAR: { + case SPUISD::PREFSLOT2VEC: { switch (Op0.getOpcode()) { default: break; case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: { - // (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot ))) -> + // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot ))) -> // - // but only if the SPUpromote_scalar and types match. + // but only if the SPUprefslot2vec and types match. SDValue Op00 = Op0.getOperand(0); if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { SDValue Op000 = Op00.getOperand(0); @@ -3134,7 +2935,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const break; } case SPUISD::VEC2PREFSLOT: { - // (SPUpromote_scalar (SPUvec2prefslot )) -> + // (SPUprefslot2vec (SPUvec2prefslot )) -> // Result = Op0.getOperand(0); break; @@ -3143,14 +2944,15 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const break; } } + // Otherwise, return unchanged. #ifndef NDEBUG if (Result.getNode()) { - DEBUG(cerr << "\nReplace.SPU: "); + DEBUG(errs() << "\nReplace.SPU: "); DEBUG(N->dump(&DAG)); - DEBUG(cerr << "\nWith: "); + DEBUG(errs() << "\nWith: "); DEBUG(Result.getNode()->dump(&DAG)); - DEBUG(cerr << "\n"); + DEBUG(errs() << "\n"); } #endif @@ -3181,7 +2983,7 @@ SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const std::pair SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const + EVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters @@ -3214,62 +3016,45 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, const SelectionDAG &DAG, unsigned Depth ) const { #if 0 - const uint64_t uint64_sizebits = sizeof(uint64_t) * 8; -#endif + const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; switch (Op.getOpcode()) { default: // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); break; - -#if 0 case CALL: case SHUFB: case SHUFFLE_MASK: case CNTB: -#endif - - case SPUISD::PROMOTE_SCALAR: { - SDValue Op0 = Op.getOperand(0); - MVT Op0VT = Op0.getValueType(); - unsigned Op0VTBits = Op0VT.getSizeInBits(); - uint64_t InMask = Op0VT.getIntegerVTBitMask(); - KnownZero |= APInt(Op0VTBits, ~InMask, false); - KnownOne |= APInt(Op0VTBits, InMask, false); - break; - } - + case SPUISD::PREFSLOT2VEC: case SPUISD::LDRESULT: - case SPUISD::VEC2PREFSLOT: { - MVT OpVT = Op.getValueType(); - unsigned OpVTBits = OpVT.getSizeInBits(); - uint64_t InMask = OpVT.getIntegerVTBitMask(); - KnownZero |= APInt(OpVTBits, ~InMask, false); - KnownOne |= APInt(OpVTBits, InMask, false); - break; - } - -#if 0 - case MPY: - case MPYU: - case MPYH: - case MPYHH: + case SPUISD::VEC2PREFSLOT: case SPUISD::SHLQUAD_L_BITS: case SPUISD::SHLQUAD_L_BYTES: - case SPUISD::VEC_SHL: - case SPUISD::VEC_SRL: - case SPUISD::VEC_SRA: case SPUISD::VEC_ROTL: case SPUISD::VEC_ROTR: - case SPUISD::ROTQUAD_RZ_BYTES: - case SPUISD::ROTQUAD_RZ_BITS: case SPUISD::ROTBYTES_LEFT: case SPUISD::SELECT_MASK: case SPUISD::SELB: - case SPUISD::FPInterp: - case SPUISD::FPRecipEst: - case SPUISD::SEXT32TO64: + } #endif +} + +unsigned +SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + switch (Op.getOpcode()) { + default: + return 1; + + case ISD::SETCC: { + EVT VT = Op.getValueType(); + + if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { + VT = MVT::i32; + } + return VT.getSizeInBits(); + } } }