X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FTarget%2FCellSPU%2FSPUISelLowering.cpp;h=50ba00f63675302c94f8b4922e2b742b9384c66e;hb=e53a600f065075731d0aeb9dc8f4f3d75f5a05f8;hp=184f0b23fa54bfd009401b0fe595ef8b143335f4;hpb=aaffa05d0a652dd3eae76a941d02d6b0469fa821;p=oota-llvm.git diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 184f0b23fa5..50ba00f6367 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1,5 +1,5 @@ -//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// // +//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source @@ -15,20 +15,23 @@ #include "SPUISelLowering.h" #include "SPUTargetMachine.h" #include "SPUFrameInfo.h" -#include "llvm/ADT/VectorExtras.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetOptions.h" - +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -68,52 +71,61 @@ namespace { #ifndef NDEBUG if (retval == 0) { - cerr << "getValueTypeMapEntry returns NULL for " - << VT.getMVTString() - << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "getValueTypeMapEntry returns NULL for " + << VT.getMVTString(); + llvm_report_error(Msg.str()); } #endif return retval; } - //! Predicate that returns true if operand is a memory target + //! Expand a library call into an actual call DAG node /*! - \arg Op Operand to test - \return true if the operand is a memory target (i.e., global - address, external symbol, constant pool) or an A-form - address. + \note + This code is taken from SelectionDAGLegalize, since it is not exposed as + part of the LLVM SelectionDAG API. */ - bool isMemoryOperand(const SDValue &Op) - { - const unsigned Opc = Op.getOpcode(); - return (Opc == ISD::GlobalAddress - || Opc == ISD::GlobalTLSAddress - || Opc == ISD::JumpTable - || Opc == ISD::ConstantPool - || Opc == ISD::ExternalSymbol - || Opc == ISD::TargetGlobalAddress - || Opc == ISD::TargetGlobalTLSAddress - || Opc == ISD::TargetJumpTable - || Opc == ISD::TargetConstantPool - || Opc == ISD::TargetExternalSymbol - || Opc == SPUISD::AFormAddr); - } - - //! Predicate that returns true if the operand is an indirect target - bool isIndirectOperand(const SDValue &Op) - { - const unsigned Opc = Op.getOpcode(); - return (Opc == ISD::Register - || Opc == SPUISD::LDRESULT); + + SDValue + ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, + bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) { + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + MVT ArgVT = Op.getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForMVT(*DAG.getContext()); + Entry.Node = Op.getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = + Op.getNode()->getValueType(0).getTypeForMVT(*DAG.getContext()); + std::pair CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, CallingConv::C, false, Callee, Args, DAG, + Op.getDebugLoc()); + + return CallInfo.first; } } SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) - : TargetLowering(TM), - SPUTM(TM) -{ + : TargetLowering(TM, new TargetLoweringObjectFileELF()), + SPUTM(TM) { // Fold away setcc operations if possible. setPow2DivIsCheap(); @@ -121,6 +133,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(true); + // Set RTLIB libcall names as used by SPU: + setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); + // Set up the SPU's register classes: addRegisterClass(MVT::i8, SPU::R8CRegisterClass); addRegisterClass(MVT::i16, SPU::R16CRegisterClass); @@ -130,43 +145,54 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); - // Initialize libcalls: - setLibcallName(RTLIB::MUL_I64, "__muldi3"); - // SPU has no sign or zero extended loads for i1, i8, i16: setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); - setTruncStoreAction(MVT::i8, MVT::i8, Custom); - setTruncStoreAction(MVT::i16, MVT::i8, Custom); - setTruncStoreAction(MVT::i32, MVT::i8, Custom); - setTruncStoreAction(MVT::i64, MVT::i8, Custom); - setTruncStoreAction(MVT::i128, MVT::i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); + + setTruncStoreAction(MVT::i128, MVT::i64, Expand); + setTruncStoreAction(MVT::i128, MVT::i32, Expand); + setTruncStoreAction(MVT::i128, MVT::i16, Expand); + setTruncStoreAction(MVT::i128, MVT::i8, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); // SPU constant load actions are custom lowered: - setOperationAction(ISD::Constant, MVT::i64, Custom); setOperationAction(ISD::ConstantFP, MVT::f32, Legal); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); // SPU's loads and stores have to be custom lowered: - for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; + for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; ++sctype) { MVT VT = (MVT::SimpleValueType)sctype; - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setLoadExtAction(ISD::EXTLOAD, VT, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { + MVT StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } } - // Custom lower BRCOND for i8 to "promote" the result to i16 - setOperationAction(ISD::BRCOND, MVT::Other, Custom); + for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; + ++sctype) { + MVT VT = (MVT::SimpleValueType) sctype; + + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { + MVT StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } + } // Expand the jumptable branches setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -177,18 +203,42 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); -#if 0 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); -#endif // SPU has no intrinsics for these particular operations: setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - // PowerPC has no SREM/UREM instructions - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); + // SPU has no division/remainder instructions + setOperationAction(ISD::SREM, MVT::i8, Expand); + setOperationAction(ISD::UREM, MVT::i8, Expand); + setOperationAction(ISD::SDIV, MVT::i8, Expand); + setOperationAction(ISD::UDIV, MVT::i8, Expand); + setOperationAction(ISD::SDIVREM, MVT::i8, Expand); + setOperationAction(ISD::UDIVREM, MVT::i8, Expand); + setOperationAction(ISD::SREM, MVT::i16, Expand); + setOperationAction(ISD::UREM, MVT::i16, Expand); + setOperationAction(ISD::SDIV, MVT::i16, Expand); + setOperationAction(ISD::UDIV, MVT::i16, Expand); + setOperationAction(ISD::SDIVREM, MVT::i16, Expand); + setOperationAction(ISD::UDIVREM, MVT::i16, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i128, Expand); + setOperationAction(ISD::UREM, MVT::i128, Expand); + setOperationAction(ISD::SDIV, MVT::i128, Expand); + setOperationAction(ISD::UDIV, MVT::i128, Expand); + setOperationAction(ISD::SDIVREM, MVT::i128, Expand); + setOperationAction(ISD::UDIVREM, MVT::i128, Expand); // We don't support sin/cos/sqrt/fmod setOperationAction(ISD::FSIN , MVT::f64, Expand); @@ -198,7 +248,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); - // If we're enabling GP optimizations, use hardware square root + // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt + // for f32!) setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); @@ -223,26 +274,40 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SRL, MVT::i8, Custom); setOperationAction(ISD::SRA, MVT::i8, Custom); - // SPU needs custom lowering for shift left/right for i64 - setOperationAction(ISD::SHL, MVT::i64, Custom); - setOperationAction(ISD::SRL, MVT::i64, Custom); - setOperationAction(ISD::SRA, MVT::i64, Custom); + // Make these operations legal and handle them during instruction selection: + setOperationAction(ISD::SHL, MVT::i64, Legal); + setOperationAction(ISD::SRL, MVT::i64, Legal); + setOperationAction(ISD::SRA, MVT::i64, Legal); // Custom lower i8, i32 and i64 multiplications setOperationAction(ISD::MUL, MVT::i8, Custom); - setOperationAction(ISD::MUL, MVT::i32, Custom); - setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall - - // SMUL_LOHI, UMUL_LOHI - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); + setOperationAction(ISD::MUL, MVT::i32, Legal); + setOperationAction(ISD::MUL, MVT::i64, Legal); + + // Expand double-width multiplication + // FIXME: It would probably be reasonable to support some of these operations + setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::MULHU, MVT::i8, Expand); + setOperationAction(ISD::MULHS, MVT::i8, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::MULHU, MVT::i16, Expand); + setOperationAction(ISD::MULHS, MVT::i16, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); // Need to custom handle (some) common i8, i64 math ops - setOperationAction(ISD::ADD, MVT::i64, Custom); + setOperationAction(ISD::ADD, MVT::i8, Custom); + setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::SUB, MVT::i8, Custom); - setOperationAction(ISD::SUB, MVT::i64, Custom); + setOperationAction(ISD::SUB, MVT::i64, Legal); // SPU does not have BSWAP. It does have i32 support CTLZ. // CTPOP has to be custom lowered. @@ -253,47 +318,59 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::CTPOP, MVT::i16, Custom); setOperationAction(ISD::CTPOP, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i64, Custom); + setOperationAction(ISD::CTPOP, MVT::i128, Expand); + setOperationAction(ISD::CTTZ , MVT::i8, Expand); + setOperationAction(ISD::CTTZ , MVT::i16, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i64, Expand); + setOperationAction(ISD::CTTZ , MVT::i128, Expand); + setOperationAction(ISD::CTLZ , MVT::i8, Promote); + setOperationAction(ISD::CTLZ , MVT::i16, Promote); setOperationAction(ISD::CTLZ , MVT::i32, Legal); + setOperationAction(ISD::CTLZ , MVT::i64, Expand); + setOperationAction(ISD::CTLZ , MVT::i128, Expand); // SPU has a version of select that implements (a&~c)|(b&c), just like // select ought to work: setOperationAction(ISD::SELECT, MVT::i8, Legal); setOperationAction(ISD::SELECT, MVT::i16, Legal); setOperationAction(ISD::SELECT, MVT::i32, Legal); - setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Legal); setOperationAction(ISD::SETCC, MVT::i8, Legal); setOperationAction(ISD::SETCC, MVT::i16, Legal); setOperationAction(ISD::SETCC, MVT::i32, Legal); - setOperationAction(ISD::SETCC, MVT::i64, Expand); - - // Zero extension and sign extension for i64 have to be - // custom legalized - setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom); - - // SPU has a legal FP -> signed INT instruction - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::SETCC, MVT::i64, Legal); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + + // Custom lower i128 -> i64 truncates + setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); + + setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); + // SPU has a legal FP -> signed INT instruction for f32, but for f64, need + // to expand to a libcall, hence the custom lowering: + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); // FDIV on SPU requires custom lowering - setOperationAction(ISD::FDIV, MVT::f32, Custom); - //setOperationAction(ISD::FDIV, MVT::f64, Custom); + setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall - // SPU has [U|S]INT_TO_FP - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); @@ -315,9 +392,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) ++sctype) { MVT VT = (MVT::SimpleValueType)sctype; - setOperationAction(ISD::GlobalAddress, VT, Custom); - setOperationAction(ISD::ConstantPool, VT, Custom); - setOperationAction(ISD::JumpTable, VT, Custom); + setOperationAction(ISD::GlobalAddress, VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::JumpTable, VT, Custom); } // RET must be custom lowered, to meet ABI requirements @@ -354,29 +431,31 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); + // "Odd size" vector classes that we're willing to support: + addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass); + for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT VT = (MVT::SimpleValueType)i; // add/sub are legal for all supported vector VT's. - setOperationAction(ISD::ADD , VT, Legal); - setOperationAction(ISD::SUB , VT, Legal); + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); // mul has to be custom lowered. - setOperationAction(ISD::MUL , VT, Custom); + setOperationAction(ISD::MUL, VT, Legal); - setOperationAction(ISD::AND , VT, Legal); - setOperationAction(ISD::OR , VT, Legal); - setOperationAction(ISD::XOR , VT, Legal); - setOperationAction(ISD::LOAD , VT, Legal); - setOperationAction(ISD::SELECT, VT, Legal); - setOperationAction(ISD::STORE, VT, Legal); + setOperationAction(ISD::AND, VT, Legal); + setOperationAction(ISD::OR, VT, Legal); + setOperationAction(ISD::XOR, VT, Legal); + setOperationAction(ISD::LOAD, VT, Legal); + setOperationAction(ISD::SELECT, VT, Legal); + setOperationAction(ISD::STORE, VT, Legal); // These operations need to be expanded: - setOperationAction(ISD::SDIV, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::UDIV, VT, Expand); - setOperationAction(ISD::UREM, VT, Expand); - setOperationAction(ISD::FDIV, VT, Custom); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); // Custom lower build_vector, constant pool spills, insert and // extract vector elements: @@ -388,14 +467,15 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } - setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::AND, MVT::v16i8, Custom); setOperationAction(ISD::OR, MVT::v16i8, Custom); setOperationAction(ISD::XOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setShiftAmountType(MVT::i32); - setBooleanContents(ZeroOrOneBooleanContent); + setBooleanContents(ZeroOrNegativeOneBooleanContent); setStackPointerRegisterToSaveRestore(SPU::R1); @@ -407,8 +487,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) computeRegisterProperties(); - // Set other properties: - setSchedulingPreference(SchedulingForLatency); + // Set pre-RA register scheduler default to BURR, which produces slightly + // better code than the default (could also be TDRR, but TargetLowering.h + // needs a mod to support that model): + setSchedulingPreference(SchedulingForRegPressure); } const char * @@ -426,18 +508,8 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; - node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR"; + node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; - node_names[(unsigned) SPUISD::VEC2PREFSLOT_CHAINED] - = "SPUISD::VEC2PREFSLOT_CHAINED"; - node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT"; - node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT"; - node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT"; - node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT"; - node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY"; - node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU"; - node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH"; - node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH"; node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; @@ -445,24 +517,14 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; - node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] = - "SPUISD::ROTQUAD_RZ_BYTES"; - node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] = - "SPUISD::ROTQUAD_RZ_BITS"; node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; - node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] = - "SPUISD::ROTBYTES_LEFT_CHAINED"; node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = - "SPUISD::ROTBYTES_LEFT_BITS"; + "SPUISD::ROTBYTES_LEFT_BITS"; node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; - node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED"; - node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE"; - node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED"; - node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE"; - node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp"; - node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst"; - node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; + node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; + node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; + node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; } std::map::iterator i = node_names.find(Opcode); @@ -470,9 +532,18 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const return ((i != node_names.end()) ? i->second : 0); } -MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const { - MVT VT = Op.getValueType(); - return (VT.isInteger() ? VT : MVT(MVT::i32)); +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const { + return 3; +} + +//===----------------------------------------------------------------------===// +// Return the Cell SPU's SETCC result type +//===----------------------------------------------------------------------===// + +MVT SPUTargetLowering::getSetCCResultType(MVT VT) const { + // i16 and i32 are valid SETCC result types + return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32); } //===----------------------------------------------------------------------===// @@ -485,195 +556,158 @@ MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const { // LowerOperation implementation //===----------------------------------------------------------------------===// -/// Aligned load common code for CellSPU -/*! - \param[in] Op The SelectionDAG load or store operand - \param[in] DAG The selection DAG - \param[in] ST CellSPU subtarget information structure - \param[in,out] alignment Caller initializes this to the load or store node's - value from getAlignment(), may be updated while generating the aligned load - \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned - offset (divisible by 16, modulo 16 == 0) - \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the - offset of the preferred slot (modulo 16 != 0) - \param[in,out] VT Caller initializes this value type to the the load or store - node's loaded or stored value type; may be updated if an i1-extended load or - store. - \param[out] was16aligned true if the base pointer had 16-byte alignment, - otherwise false. Can help to determine if the chunk needs to be rotated. - - Both load and store lowering load a block of data aligned on a 16-byte - boundary. This is the common aligned load code shared between both. - */ -static SDValue -AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST, - LSBaseSDNode *LSN, - unsigned &alignment, int &alignOffs, int &prefSlotOffs, - MVT &VT, bool &was16aligned) -{ - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - const valtype_map_s *vtm = getValueTypeMapEntry(VT); - SDValue basePtr = LSN->getBasePtr(); - SDValue chain = LSN->getChain(); - - if (basePtr.getOpcode() == ISD::ADD) { - SDValue Op1 = basePtr.getNode()->getOperand(1); - - if (Op1.getOpcode() == ISD::Constant - || Op1.getOpcode() == ISD::TargetConstant) { - const ConstantSDNode *CN = cast(basePtr.getOperand(1)); - - alignOffs = (int) CN->getZExtValue(); - prefSlotOffs = (int) (alignOffs & 0xf); - - // Adjust the rotation amount to ensure that the final result ends up in - // the preferred slot: - prefSlotOffs -= vtm->prefslot_byte; - basePtr = basePtr.getOperand(0); - - // Loading from memory, can we adjust alignment? - if (basePtr.getOpcode() == SPUISD::AFormAddr) { - SDValue APtr = basePtr.getOperand(0); - if (APtr.getOpcode() == ISD::TargetGlobalAddress) { - GlobalAddressSDNode *GSDN = cast(APtr); - alignment = GSDN->getGlobal()->getAlignment(); - } - } - } else { - alignOffs = 0; - prefSlotOffs = -vtm->prefslot_byte; - } - } else if (basePtr.getOpcode() == ISD::FrameIndex) { - FrameIndexSDNode *FIN = cast(basePtr); - alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize()); - prefSlotOffs = (int) (alignOffs & 0xf); - prefSlotOffs -= vtm->prefslot_byte; - basePtr = DAG.getRegister(SPU::R1, VT); - } else { - alignOffs = 0; - prefSlotOffs = -vtm->prefslot_byte; - } - - if (alignment == 16) { - // Realign the base pointer as a D-Form address: - if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) { - basePtr = DAG.getNode(ISD::ADD, PtrVT, - basePtr, - DAG.getConstant((alignOffs & ~0xf), PtrVT)); - } - - // Emit the vector load: - was16aligned = true; - return DAG.getLoad(MVT::v16i8, chain, basePtr, - LSN->getSrcValue(), LSN->getSrcValueOffset(), - LSN->isVolatile(), 16); - } - - // Unaligned load or we're using the "large memory" model, which means that - // we have to be very pessimistic: - if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Add the offset - basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, - DAG.getConstant((alignOffs & ~0xf), PtrVT)); - was16aligned = false; - return DAG.getLoad(MVT::v16i8, chain, basePtr, - LSN->getSrcValue(), LSN->getSrcValueOffset(), - LSN->isVolatile(), 16); -} - /// Custom lower loads for CellSPU /*! All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements within a 16-byte block, we have to rotate to extract the requested element. - */ + + For extending loads, we also want to ensure that the following sequence is + emitted, e.g. for MVT::f32 extending load to MVT::f64: + +\verbatim +%1 v16i8,ch = load +%2 v16i8,ch = rotate %1 +%3 v4f8, ch = bitconvert %2 +%4 f32 = vec2perfslot %3 +%5 f64 = fp_extend %4 +\endverbatim +*/ static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { LoadSDNode *LN = cast(Op); SDValue the_chain = LN->getChain(); - MVT VT = LN->getMemoryVT(); - MVT OpVT = Op.getNode()->getValueType(0); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + MVT InVT = LN->getMemoryVT(); + MVT OutVT = Op.getValueType(); ISD::LoadExtType ExtType = LN->getExtensionType(); unsigned alignment = LN->getAlignment(); - SDValue Ops[8]; + const valtype_map_s *vtm = getValueTypeMapEntry(InVT); + DebugLoc dl = Op.getDebugLoc(); switch (LN->getAddressingMode()) { case ISD::UNINDEXED: { - int offset, rotamt; - bool was16aligned; - SDValue result = - AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned); - - if (result.getNode() == 0) - return result; - - the_chain = result.getValue(1); - // Rotate the chunk if necessary - if (rotamt < 0) - rotamt += 16; - if (rotamt != 0 || !was16aligned) { - SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other); - - Ops[0] = the_chain; - Ops[1] = result; - if (was16aligned) { - Ops[2] = DAG.getConstant(rotamt, MVT::i16); + SDValue result; + SDValue basePtr = LN->getBasePtr(); + SDValue rotate; + + if (alignment == 16) { + ConstantSDNode *CN; + + // Special cases for a known aligned load to simplify the base pointer + // and the rotation amount: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast (basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte); + + if (rotamt < 0) + rotamt += 16; + + rotate = DAG.getConstant(rotamt, MVT::i16); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) + || (basePtr.getOpcode() == SPUISD::IndirectAddr + && basePtr.getOperand(0).getOpcode() == SPUISD::Hi + && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { + // Plain aligned a-form address: rotate into preferred slot + // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) + int64_t rotamt = -vtm->prefslot_byte; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getConstant(rotamt, MVT::i16); } else { - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - LoadSDNode *LN1 = cast(result); - Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(), + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + int64_t rotamt = -vtm->prefslot_byte; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, DAG.getConstant(rotamt, PtrVT)); } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa(Op1)) { + // Convert the (add , ) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); + } else { + // Convert the (add , ) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } - result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3); - the_chain = result.getValue(1); + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + rotate = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(-vtm->prefslot_byte, PtrVT)); } - if (VT == OpVT || ExtType == ISD::EXTLOAD) { - SDVTList scalarvts; - MVT vecVT = MVT::v16i8; + // Re-emit as a v16i8 vector load + result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), 16); - // Convert the loaded v16i8 vector to the appropriate vector type - // specified by the operand: - if (OpVT == VT) { - if (VT != MVT::i1) - vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); - } else - vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); + // Update the chain + the_chain = result.getValue(1); - Ops[0] = the_chain; - Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result); - scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other); - result = DAG.getNode(SPUISD::VEC2PREFSLOT_CHAINED, scalarvts, Ops, 2); - the_chain = result.getValue(1); - } else { - // Handle the sign and zero-extending loads for i1 and i8: - unsigned NewOpC; + // Rotate into the preferred slot: + result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8, + result.getValue(0), rotate); - if (ExtType == ISD::SEXTLOAD) { - NewOpC = (OpVT == MVT::i1 - ? SPUISD::EXTRACT_I1_SEXT - : SPUISD::EXTRACT_I8_SEXT); - } else { - assert(ExtType == ISD::ZEXTLOAD); - NewOpC = (OpVT == MVT::i1 - ? SPUISD::EXTRACT_I1_ZEXT - : SPUISD::EXTRACT_I8_ZEXT); - } + // Convert the loaded v16i8 vector to the appropriate vector type + // specified by the operand: + MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits())); + result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, + DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result)); - result = DAG.getNode(NewOpC, OpVT, result); + // Handle extending loads by extending the scalar result: + if (ExtType == ISD::SEXTLOAD) { + result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); + } else if (ExtType == ISD::ZEXTLOAD) { + result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); + } else if (ExtType == ISD::EXTLOAD) { + unsigned NewOpc = ISD::ANY_EXTEND; + + if (OutVT.isFloatingPoint()) + NewOpc = ISD::FP_EXTEND; + + result = DAG.getNode(NewOpc, dl, OutVT, result); } - SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); + SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); SDValue retops[2] = { result, the_chain }; - result = DAG.getNode(SPUISD::LDRESULT, retvts, + result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, retops, sizeof(retops) / sizeof(retops[0])); return result; } @@ -682,11 +716,15 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { case ISD::POST_INC: case ISD::POST_DEC: case ISD::LAST_INDEXED_MODE: - cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " + { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than " "UNINDEXED\n"; - cerr << (unsigned) LN->getAddressingMode() << "\n"; - abort(); - /*NOTREACHED*/ + Msg << (unsigned) LN->getAddressingMode(); + llvm_report_error(Msg.str()); + /*NOTREACHED*/ + } } return SDValue(); @@ -705,27 +743,91 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { MVT VT = Value.getValueType(); MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); unsigned alignment = SN->getAlignment(); switch (SN->getAddressingMode()) { case ISD::UNINDEXED: { - int chunk_offset, slot_offset; - bool was16aligned; - // The vector type we really want to load from the 16-byte chunk. MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())), stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits())); - SDValue alignLoadVec = - AlignedLoad(Op, DAG, ST, SN, alignment, - chunk_offset, slot_offset, VT, was16aligned); + SDValue alignLoadVec; + SDValue basePtr = SN->getBasePtr(); + SDValue the_chain = SN->getChain(); + SDValue insertEltOffs; + + if (alignment == 16) { + ConstantSDNode *CN; + + // Special cases for a known aligned load to simplify the base pointer + // and insertion byte: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast(basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & 0xf), PtrVT)); + + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else { + // Otherwise, assume it's at byte 0 of basePtr + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa(Op1)) { + // Convert the (add , ) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); + } else { + // Convert the (add , ) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Insertion point is solely determined by basePtr's contents + insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } - if (alignLoadVec.getNode() == 0) - return alignLoadVec; + // Re-emit as a v16i8 vector load + alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, + SN->getSrcValue(), SN->getSrcValueOffset(), + SN->isVolatile(), 16); + + // Update the chain + the_chain = alignLoadVec.getValue(1); LoadSDNode *LN = cast(alignLoadVec); - SDValue basePtr = LN->getBasePtr(); - SDValue the_chain = alignLoadVec.getValue(1); SDValue theValue = SN->getValue(); SDValue result; @@ -737,40 +839,33 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { theValue = theValue.getOperand(0); } - chunk_offset &= 0xf; - - SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT); - SDValue insertEltPtr; - // If the base pointer is already a D-form address, then just create // a new D-form address with a slot offset and the orignal base pointer. // Otherwise generate a D-form address with the slot offset relative // to the stack pointer, which is always aligned. - DEBUG(cerr << "CellSPU LowerSTORE: basePtr = "); - DEBUG(basePtr.getNode()->dump(&DAG)); - DEBUG(cerr << "\n"); - - if (basePtr.getOpcode() == SPUISD::IndirectAddr || - (basePtr.getOpcode() == ISD::ADD - && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) { - insertEltPtr = basePtr; - } else { - insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs); - } +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "CellSPU LowerSTORE: basePtr = "; + basePtr.getNode()->dump(&DAG); + cerr << "\n"; + } +#endif SDValue insertEltOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, stVecVT, insertEltPtr); + DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); SDValue vectorizeOp = - DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue); + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); - result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec, - DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp)); + result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, + vectorizeOp, alignLoadVec, + DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::v4i32, insertEltOp)); - result = DAG.getStore(the_chain, result, basePtr, + result = DAG.getStore(the_chain, dl, result, basePtr, LN->getSrcValue(), LN->getSrcValueOffset(), LN->isVolatile(), LN->getAlignment()); -#if 0 && defined(NDEBUG) +#if 0 && !defined(NDEBUG) if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { const SDValue ¤tRoot = DAG.getRoot(); @@ -781,7 +876,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { DAG.setRoot(currentRoot); } #endif - + return result; /*UNREACHED*/ } @@ -790,18 +885,22 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { case ISD::POST_INC: case ISD::POST_DEC: case ISD::LAST_INDEXED_MODE: - cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " + { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than " "UNINDEXED\n"; - cerr << (unsigned) SN->getAddressingMode() << "\n"; - abort(); - /*NOTREACHED*/ + Msg << (unsigned) SN->getAddressingMode(); + llvm_report_error(Msg.str()); + /*NOTREACHED*/ + } } return SDValue(); } -/// Generate the address of a constant pool entry. -static SDValue +//! Generate the address of a constant pool entry. +SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { MVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); @@ -809,24 +908,31 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); SDValue Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); if (TM.getRelocationModel() == Reloc::Static) { if (!ST->usingLargeMem()) { // Just return the SDValue with the constant pool address in it. - return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero); + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); } else { - SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero); - SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero); - return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo); + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); } } - assert(0 && - "LowerConstantPool: Relocation model other than static" - " not supported."); + llvm_unreachable("LowerConstantPool: Relocation model other than static" + " not supported."); return SDValue(); } +//! Alternate entry point for generating the address of a constant pool entry +SDValue +SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { + return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); +} + static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { MVT PtrVT = Op.getValueType(); @@ -834,19 +940,21 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); SDValue Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); if (TM.getRelocationModel() == Reloc::Static) { if (!ST->usingLargeMem()) { - return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero); + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); } else { - SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero); - SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero); - return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo); + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); } } - assert(0 && - "LowerJumpTable: Relocation model other than static not supported."); + llvm_unreachable("LowerJumpTable: Relocation model other than static" + " not supported."); return SDValue(); } @@ -858,44 +966,20 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); const TargetMachine &TM = DAG.getTarget(); SDValue Zero = DAG.getConstant(0, PtrVT); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); if (TM.getRelocationModel() == Reloc::Static) { if (!ST->usingLargeMem()) { - return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero); + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); } else { - SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero); - SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero); - return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo); + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); } } else { - cerr << "LowerGlobalAddress: Relocation model other than static not " - << "supported.\n"; - abort(); - /*NOTREACHED*/ - } - - return SDValue(); -} - -//! Custom lower i64 integer constants -/*! - This code inserts all of the necessary juggling that needs to occur to load - a 64-bit constant into a register. - */ -static SDValue -LowerConstant(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - ConstantSDNode *CN = cast(Op.getNode()); - - if (VT == MVT::i64) { - SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64); - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); - } else { - cerr << "LowerConstant: unhandled constant type " - << VT.getMVTString() - << "\n"; - abort(); + llvm_report_error("LowerGlobalAddress: Relocation model other than static" + "not supported."); /*NOTREACHED*/ } @@ -906,38 +990,25 @@ LowerConstant(SDValue Op, SelectionDAG &DAG) { static SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); - ConstantFPSDNode *FP = cast(Op.getNode()); - - assert((FP != 0) && - "LowerConstantFP: Node is not ConstantFPSDNode"); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); if (VT == MVT::f64) { + ConstantFPSDNode *FP = cast(Op.getNode()); + + assert((FP != 0) && + "LowerConstantFP: Node is not ConstantFPSDNode"); + uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); - return DAG.getNode(ISD::BIT_CONVERT, VT, - LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG)); + SDValue T = DAG.getConstant(dbits, MVT::i64); + SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec)); } return SDValue(); } -//! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16) -static SDValue -LowerBRCOND(SDValue Op, SelectionDAG &DAG) -{ - SDValue Cond = Op.getOperand(1); - MVT CondVT = Cond.getValueType(); - MVT CondNVT; - - if (CondVT == MVT::i8) { - CondNVT = MVT::i16; - return DAG.getNode(ISD::BRCOND, Op.getValueType(), - Op.getOperand(0), - DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)), - Op.getOperand(2)); - } else - return SDValue(); // Unchanged -} - static SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) { @@ -947,6 +1018,7 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) SmallVector ArgValues; SDValue Root = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; + DebugLoc dl = Op.getDebugLoc(); const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); @@ -969,10 +1041,11 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) switch (ObjectVT.getSimpleVT()) { default: { - cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " - << ObjectVT.getMVTString() - << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "LowerFORMAL_ARGUMENTS Unhandled argument type: " + << ObjectVT.getMVTString(); + llvm_report_error(Msg.str()); } case MVT::i8: ArgRegClass = &SPU::R8CRegClass; @@ -986,6 +1059,9 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) case MVT::i64: ArgRegClass = &SPU::R64CRegClass; break; + case MVT::i128: + ArgRegClass = &SPU::GPRCRegClass; + break; case MVT::f32: ArgRegClass = &SPU::R32FPRegClass; break; @@ -1004,7 +1080,7 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); + ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); ++ArgRegIdx; } else { // We need to load the argument to a virtual register if we determined @@ -1012,7 +1088,7 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) // or we're forced to do vararg int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0); ArgOffset += StackSlotSize; } @@ -1033,7 +1109,7 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); - SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0); + SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0); Root = Store.getOperand(0); MemOps.push_back(Store); @@ -1041,13 +1117,14 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) ArgOffset += StackSlotSize; } if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size()); + Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); } ArgValues.push_back(Root); // Return the new list of results. - return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(), + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), &ArgValues[0], ArgValues.size()); } @@ -1065,8 +1142,7 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); } -static -SDValue +static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { CallSDNode *TheCall = cast(Op.getNode()); SDValue Chain = TheCall->getChain(); @@ -1075,6 +1151,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); + DebugLoc dl = TheCall->getDebugLoc(); // Handy pointer type MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -1105,17 +1182,19 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); switch (Arg.getValueType().getSimpleVT()) { - default: assert(0 && "Unexpected ValueType for argument!"); + default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i8: + case MVT::i16: case MVT::i32: case MVT::i64: case MVT::i128: if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); ArgOffset += StackSlotSize; } break; @@ -1124,10 +1203,12 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); ArgOffset += StackSlotSize; } break; + case MVT::v2i64: + case MVT::v2f64: case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: @@ -1135,7 +1216,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); ArgOffset += StackSlotSize; } break; @@ -1149,7 +1230,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (!MemOpChains.empty()) { // Adjust the stack pointer for the stack arguments. - Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); } @@ -1157,8 +1238,8 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, - InFlag); + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } @@ -1184,18 +1265,27 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // This may be an unsafe assumption for JIT and really large compilation // units. if (GV->isDeclaration()) { - Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero); + Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); } else { - Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero); + Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); } } else { // "Large memory" mode: Turn all calls into indirect calls with a X-form // address pairs: - Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero); + Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); } - } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) - Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType()); - else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + MVT CalleeVT = Callee.getValueType(); + SDValue Zero = DAG.getConstant(0, PtrVT); + SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), + Callee.getValueType()); + + if (!ST->usingLargeMem()) { + Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); + } else { + Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); + } + } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { // If this is an absolute destination address that appears to be a legal // local store address, use the munged value. Callee = SDValue(Dest, 0); @@ -1213,7 +1303,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (InFlag.getNode()) Ops.push_back(InFlag); // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag), + Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), &Ops[0], Ops.size()); InFlag = Chain.getValue(1); @@ -1227,40 +1317,50 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // If the call has results, copy the values out of the ret val registers. switch (TheCall->getValueType(0).getSimpleVT()) { - default: assert(0 && "Unexpected ret value!"); + default: llvm_unreachable("Unexpected ret value!"); case MVT::Other: break; case MVT::i32: if (TheCall->getValueType(1) == MVT::i32) { - Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1); + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, + MVT::i32, InFlag).getValue(1); ResultVals[0] = Chain.getValue(0); - Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, Chain.getValue(2)).getValue(1); ResultVals[1] = Chain.getValue(0); NumResults = 2; } else { - Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1); + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, + InFlag).getValue(1); ResultVals[0] = Chain.getValue(0); NumResults = 1; } break; case MVT::i64: - Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1); + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64, + InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + NumResults = 1; + break; + case MVT::i128: + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128, + InFlag).getValue(1); ResultVals[0] = Chain.getValue(0); NumResults = 1; break; case MVT::f32: case MVT::f64: - Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0), + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0), InFlag).getValue(1); ResultVals[0] = Chain.getValue(0); NumResults = 1; break; case MVT::v2f64: + case MVT::v2i64: case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: - Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0), + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0), InFlag).getValue(1); ResultVals[0] = Chain.getValue(0); NumResults = 1; @@ -1273,7 +1373,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // Otherwise, merge everything together with a MERGE_VALUES node. ResultVals[NumResults++] = Chain; - SDValue Res = DAG.getMergeValues(ResultVals, NumResults); + SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl); return Res.getValue(Op.getResNo()); } @@ -1282,7 +1382,8 @@ LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) { SmallVector RVLocs; unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - CCState CCInfo(CC, isVarArg, TM, RVLocs); + DebugLoc dl = Op.getDebugLoc(); + CCState CCInfo(CC, isVarArg, TM, RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU); // If this is the first return lowered for this function, add the regs to the @@ -1299,14 +1400,15 @@ LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) { for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Op.getOperand(i*2+1), Flag); Flag = Chain.getValue(1); } if (Flag.getNode()) - return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag); + return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); else - return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain); + return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); } @@ -1333,7 +1435,7 @@ getVecImm(SDNode *N) { } } - return 0; // All UNDEF: use implicit def.; not Constant node + return 0; } /// get_vec_i18imm - Test if this vector is a vector filled with the same value @@ -1460,269 +1562,182 @@ SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// If this is a vector of constants or undefs, get the bits. A bit in -// UndefBits is set if the corresponding element of the vector is an -// ISD::UNDEF value. For undefs, the corresponding VectorBits values are -// zero. Return true if this is not an array of constants, false if it is. -// -static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], - uint64_t UndefBits[2]) { - // Start with zero'd results. - VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; - - unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits(); - for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { - SDValue OpVal = BV->getOperand(i); - - unsigned PartNo = i >= e/2; // In the upper 128 bits? - unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. - - uint64_t EltBits = 0; - if (OpVal.getOpcode() == ISD::UNDEF) { - uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize); - UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); - continue; - } else if (ConstantSDNode *CN = dyn_cast(OpVal)) { - EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize)); - } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { - const APFloat &apf = CN->getValueAPF(); - EltBits = (CN->getValueType(0) == MVT::f32 - ? FloatToBits(apf.convertToFloat()) - : DoubleToBits(apf.convertToDouble())); - } else { - // Nonconstant element. - return true; - } - - VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); - } +//! Lower a BUILD_VECTOR instruction creatively: +SDValue +LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + MVT EltVT = VT.getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + BuildVectorSDNode *BCN = dyn_cast(Op.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); + unsigned minSplatBits = EltVT.getSizeInBits(); - //printf("%llx %llx %llx %llx\n", - // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); - return false; -} + if (minSplatBits < 16) + minSplatBits = 16; -/// If this is a splat (repetition) of a value across the whole vector, return -/// the smallest size that splats it. For example, "0x01010101010101..." is a -/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and -/// SplatSize = 1 byte. -static bool isConstantSplat(const uint64_t Bits128[2], - const uint64_t Undef128[2], - int MinSplatBits, - uint64_t &SplatBits, uint64_t &SplatUndef, - int &SplatSize) { - // Don't let undefs prevent splats from matching. See if the top 64-bits are - // the same as the lower 64-bits, ignoring undefs. - uint64_t Bits64 = Bits128[0] | Bits128[1]; - uint64_t Undef64 = Undef128[0] & Undef128[1]; - uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); - uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); - uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); - uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); - - if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) { - if (MinSplatBits < 64) { - - // Check that the top 32-bits are the same as the lower 32-bits, ignoring - // undefs. - if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) { - if (MinSplatBits < 32) { - - // If the top 16-bits are different than the lower 16-bits, ignoring - // undefs, we have an i32 splat. - if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) { - if (MinSplatBits < 16) { - // If the top 8-bits are different than the lower 8-bits, ignoring - // undefs, we have an i16 splat. - if ((Bits16 & (uint16_t(~Undef16) >> 8)) - == ((Bits16 >> 8) & ~Undef16)) { - // Otherwise, we have an 8-bit splat. - SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); - SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); - SplatSize = 1; - return true; - } - } else { - SplatBits = Bits16; - SplatUndef = Undef16; - SplatSize = 2; - return true; - } - } - } else { - SplatBits = Bits32; - SplatUndef = Undef32; - SplatSize = 4; - return true; - } - } - } else { - SplatBits = Bits128[0]; - SplatUndef = Undef128[0]; - SplatSize = 8; - return true; - } - } + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; - return false; // Can't be a splat if two pieces don't match. -} + if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + || minSplatBits < SplatBitSize) + return SDValue(); // Wasn't a constant vector or splat exceeded min -// If this is a case we can't handle, return null and let the default -// expansion code take care of it. If we CAN select this case, and if it -// selects to a single instruction, return Op. Otherwise, if we can codegen -// this case more efficiently than a constant pool load, lower it to the -// sequence of ops that should be used. -static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - // If this is a vector of constants or undefs, get the bits. A bit in - // UndefBits is set if the corresponding element of the vector is an - // ISD::UNDEF value. For undefs, the corresponding VectorBits values are - // zero. - uint64_t VectorBits[2]; - uint64_t UndefBits[2]; - uint64_t SplatBits, SplatUndef; - int SplatSize; - if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits) - || !isConstantSplat(VectorBits, UndefBits, - VT.getVectorElementType().getSizeInBits(), - SplatBits, SplatUndef, SplatSize)) - return SDValue(); // Not a constant vector, not a splat. + uint64_t SplatBits = APSplatBits.getZExtValue(); switch (VT.getSimpleVT()) { - default: + default: { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + << VT.getMVTString(); + llvm_report_error(Msg.str()); + /*NOTREACHED*/ + } case MVT::v4f32: { - uint32_t Value32 = SplatBits; - assert(SplatSize == 4 + uint32_t Value32 = uint32_t(SplatBits); + assert(SplatBitSize == 32 && "LowerBUILD_VECTOR: Unexpected floating point vector element."); // NOTE: pretend the constant is an integer. LLVM won't load FP constants SDValue T = DAG.getConstant(Value32, MVT::i32); - return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); break; } case MVT::v2f64: { - uint64_t f64val = SplatBits; - assert(SplatSize == 8 + uint64_t f64val = uint64_t(SplatBits); + assert(SplatBitSize == 64 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); // NOTE: pretend the constant is an integer. LLVM won't load FP constants SDValue T = DAG.getConstant(f64val, MVT::i64); - return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); break; } case MVT::v16i8: { // 8-bit constants have to be expanded to 16-bits - unsigned short Value16 = SplatBits | (SplatBits << 8); - SDValue Ops[8]; - for (int i = 0; i < 8; ++i) - Ops[i] = DAG.getConstant(Value16, MVT::i16); - return DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8)); + unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; + SmallVector Ops; + + Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); } case MVT::v8i16: { - unsigned short Value16; - if (SplatSize == 2) - Value16 = (unsigned short) (SplatBits & 0xffff); - else - Value16 = (unsigned short) (SplatBits | (SplatBits << 8)); - SDValue T = DAG.getConstant(Value16, VT.getVectorElementType()); - SDValue Ops[8]; - for (int i = 0; i < 8; ++i) Ops[i] = T; - return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8); + unsigned short Value16 = SplatBits; + SDValue T = DAG.getConstant(Value16, EltVT); + SmallVector Ops; + + Ops.assign(8, T); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); } case MVT::v4i32: { - unsigned int Value = SplatBits; - SDValue T = DAG.getConstant(Value, VT.getVectorElementType()); - return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T); + SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); + } + case MVT::v2i32: { + SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); } case MVT::v2i64: { - uint64_t val = SplatBits; - uint32_t upper = uint32_t(val >> 32); - uint32_t lower = uint32_t(val); - - if (upper == lower) { - // Magic constant that can be matched by IL, ILA, et. al. - SDValue Val = DAG.getTargetConstant(val, MVT::i64); - return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val); - } else { - SDValue LO32; - SDValue HI32; - SmallVector ShufBytes; - SDValue Result; - bool upper_special, lower_special; - - // NOTE: This code creates common-case shuffle masks that can be easily - // detected as common expressions. It is not attempting to create highly - // specialized masks to replace any and all 0's, 0xff's and 0x80's. - - // Detect if the upper or lower half is a special shuffle mask pattern: - upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000); - lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000); - - // Create lower vector if not a special pattern - if (!lower_special) { - SDValue LO32C = DAG.getConstant(lower, MVT::i32); - LO32 = DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - LO32C, LO32C, LO32C, LO32C)); - } + return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); + } + } - // Create upper vector if not a special pattern - if (!upper_special) { - SDValue HI32C = DAG.getConstant(upper, MVT::i32); - HI32 = DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - HI32C, HI32C, HI32C, HI32C)); - } + return SDValue(); +} - // If either upper or lower are special, then the two input operands are - // the same (basically, one of them is a "don't care") - if (lower_special) - LO32 = HI32; - if (upper_special) - HI32 = LO32; - if (lower_special && upper_special) { - // Unhappy situation... both upper and lower are special, so punt with - // a target constant: - SDValue Zero = DAG.getConstant(0, MVT::i32); - HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero, - Zero, Zero); - } +/*! + */ +SDValue +SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, + DebugLoc dl) { + uint32_t upper = uint32_t(SplatVal >> 32); + uint32_t lower = uint32_t(SplatVal); + + if (upper == lower) { + // Magic constant that can be matched by IL, ILA, et. al. + SDValue Val = DAG.getTargetConstant(upper, MVT::i32); + return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + Val, Val, Val, Val)); + } else { + bool upper_special, lower_special; - for (int i = 0; i < 4; ++i) { - uint64_t val = 0; - for (int j = 0; j < 4; ++j) { - SDValue V; - bool process_upper, process_lower; - val <<= 8; - process_upper = (upper_special && (i & 1) == 0); - process_lower = (lower_special && (i & 1) == 1); - - if (process_upper || process_lower) { - if ((process_upper && upper == 0) - || (process_lower && lower == 0)) - val |= 0x80; - else if ((process_upper && upper == 0xffffffff) - || (process_lower && lower == 0xffffffff)) - val |= 0xc0; - else if ((process_upper && upper == 0x80000000) - || (process_lower && lower == 0x80000000)) - val |= (j == 0 ? 0xe0 : 0x80); - } else - val |= i * 4 + j + ((i & 1) * 16); - } + // NOTE: This code creates common-case shuffle masks that can be easily + // detected as common expressions. It is not attempting to create highly + // specialized masks to replace any and all 0's, 0xff's and 0x80's. + + // Detect if the upper or lower half is a special shuffle mask pattern: + upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); + lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); + + // Both upper and lower are special, lower to a constant pool load: + if (lower_special && upper_special) { + SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, + SplatValCN, SplatValCN); + } + + SDValue LO32; + SDValue HI32; + SmallVector ShufBytes; + SDValue Result; + + // Create lower vector if not a special pattern + if (!lower_special) { + SDValue LO32C = DAG.getConstant(lower, MVT::i32); + LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + LO32C, LO32C, LO32C, LO32C)); + } + + // Create upper vector if not a special pattern + if (!upper_special) { + SDValue HI32C = DAG.getConstant(upper, MVT::i32); + HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + HI32C, HI32C, HI32C, HI32C)); + } - ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); + // If either upper or lower are special, then the two input operands are + // the same (basically, one of them is a "don't care") + if (lower_special) + LO32 = HI32; + if (upper_special) + HI32 = LO32; + + for (int i = 0; i < 4; ++i) { + uint64_t val = 0; + for (int j = 0; j < 4; ++j) { + SDValue V; + bool process_upper, process_lower; + val <<= 8; + process_upper = (upper_special && (i & 1) == 0); + process_lower = (lower_special && (i & 1) == 1); + + if (process_upper || process_lower) { + if ((process_upper && upper == 0) + || (process_lower && lower == 0)) + val |= 0x80; + else if ((process_upper && upper == 0xffffffff) + || (process_lower && lower == 0xffffffff)) + val |= 0xc0; + else if ((process_upper && upper == 0x80000000) + || (process_lower && lower == 0x80000000)) + val |= (j == 0 ? 0xe0 : 0x80); + } else + val |= i * 4 + j + ((i & 1) * 16); } - return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); + ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); } - } - } - return SDValue(); + return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size())); + } } /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on @@ -1739,47 +1754,75 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { /// \note /// SPUISD::SHUFB is eventually selected as Cell's shufb instructions. static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + const ShuffleVectorSDNode *SVN = cast(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - SDValue PermMask = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); if (V2.getOpcode() == ISD::UNDEF) V2 = V1; // If we have a single element being moved from V1 to V2, this can be handled // using the C*[DX] compute mask instructions, but the vector elements have // to be monotonically increasing with one exception element. - MVT EltVT = V1.getValueType().getVectorElementType(); + MVT VecVT = V1.getValueType(); + MVT EltVT = VecVT.getVectorElementType(); unsigned EltsFromV2 = 0; unsigned V2Elt = 0; unsigned V2EltIdx0 = 0; unsigned CurrElt = 0; + unsigned MaxElts = VecVT.getVectorNumElements(); + unsigned PrevElt = 0; + unsigned V0Elt = 0; bool monotonic = true; - if (EltVT == MVT::i8) + bool rotate = true; + + if (EltVT == MVT::i8) { V2EltIdx0 = 16; - else if (EltVT == MVT::i16) + } else if (EltVT == MVT::i16) { V2EltIdx0 = 8; - else if (EltVT == MVT::i32) + } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { V2EltIdx0 = 4; - else - assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); + } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { + V2EltIdx0 = 2; + } else + llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); - for (unsigned i = 0, e = PermMask.getNumOperands(); - EltsFromV2 <= 1 && monotonic && i != e; - ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast(PermMask.getOperand(i))->getZExtValue(); + for (unsigned i = 0; i != MaxElts; ++i) { + if (SVN->getMaskElt(i) < 0) + continue; + + unsigned SrcElt = SVN->getMaskElt(i); + + if (monotonic) { + if (SrcElt >= V2EltIdx0) { + if (1 >= (++EltsFromV2)) { + V2Elt = (V2EltIdx0 - SrcElt) << 2; + } + } else if (CurrElt != SrcElt) { + monotonic = false; + } - if (SrcElt >= V2EltIdx0) { - ++EltsFromV2; - V2Elt = (V2EltIdx0 - SrcElt) << 2; - } else if (CurrElt != SrcElt) { - monotonic = false; + ++CurrElt; } - ++CurrElt; + if (rotate) { + if (PrevElt > 0 && SrcElt < MaxElts) { + if ((PrevElt == SrcElt - 1) + || (PrevElt == MaxElts - 1 && SrcElt == 0)) { + PrevElt = SrcElt; + if (SrcElt == 0) + V0Elt = i; + } else { + rotate = false; + } + } else if (PrevElt == 0) { + // First time through, need to keep track of previous element + PrevElt = SrcElt; + } else { + // This isn't a rotation, takes elements from vector 2 + rotate = false; + } + } } if (EltsFromV2 == 1 && monotonic) { @@ -1790,41 +1833,42 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Initialize temporary register to 0 SDValue InitTempReg = - DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT)); + DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT)); // Copy register's contents as index in SHUFFLE_MASK: SDValue ShufMaskOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, V1.getValueType(), + DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32, DAG.getTargetConstant(V2Elt, MVT::i32), - DAG.getCopyFromReg(InitTempReg, VReg, PtrVT)); + DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT)); // Use shuffle mask in SHUFB synthetic instruction: - return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp); + return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, + ShufMaskOp); + } else if (rotate) { + int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; + + return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), + V1, DAG.getConstant(rotamt, MVT::i16)); } else { // Convert the SHUFFLE_VECTOR mask's input element units to the // actual bytes. unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector ResultMask; - for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast(PermMask.getOperand(i))->getZExtValue(); - - for (unsigned j = 0; j < BytesPerElement; ++j) { - ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, - MVT::i8)); - } + for (unsigned i = 0, e = MaxElts; i != e; ++i) { + unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); + + for (unsigned j = 0; j < BytesPerElement; ++j) + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); } - SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, - &ResultMask[0], ResultMask.size()); - return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask); + SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, + &ResultMask[0], ResultMask.size()); + return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); } } static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); // Op0 = the scalar + DebugLoc dl = Op.getDebugLoc(); if (Op0.getNode()->getOpcode() == ISD::Constant) { // For a constant, build the appropriate constant vector, which will @@ -1837,8 +1881,8 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { // Create a constant vector: switch (Op.getValueType().getSimpleVT()) { - default: assert(0 && "Unexpected constant value type in " - "LowerSCALAR_TO_VECTOR"); + default: llvm_unreachable("Unexpected constant value type in " + "LowerSCALAR_TO_VECTOR"); case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; @@ -1851,214 +1895,30 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { for (size_t j = 0; j < n_copies; ++j) ConstVecValues.push_back(CValue); - return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(), + return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), &ConstVecValues[0], ConstVecValues.size()); } else { // Otherwise, copy the value from one register to another: switch (Op0.getValueType().getSimpleVT()) { - default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR"); + default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: case MVT::f32: case MVT::f64: - return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0); + return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); } } return SDValue(); } -static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) { - switch (Op.getValueType().getSimpleVT()) { - default: - cerr << "CellSPU: Unknown vector multiplication, got " - << Op.getValueType().getMVTString() - << "\n"; - abort(); - /*NOTREACHED*/ - - case MVT::v4i32: { - SDValue rA = Op.getOperand(0); - SDValue rB = Op.getOperand(1); - SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB); - SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA); - SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB); - SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1); - - return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2); - break; - } - - // Multiply two v8i16 vectors (pipeline friendly version): - // a) multiply lower halves, mask off upper 16-bit of 32-bit product - // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes) - // c) Use SELB to select upper and lower halves from the intermediate results - // - // NOTE: We really want to move the SELECT_MASK to earlier to actually get the - // dual-issue. This code does manage to do this, even if it's a little on - // the wacky side - case MVT::v8i16: { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - SDValue Chain = Op.getOperand(0); - SDValue rA = Op.getOperand(0); - SDValue rB = Op.getOperand(1); - unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - - SDValue FSMBOp = - DAG.getCopyToReg(Chain, FSMBIreg, - DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, - DAG.getConstant(0xcccc, MVT::i16))); - - SDValue HHProd = - DAG.getCopyToReg(FSMBOp, HiProdReg, - DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB)); - - SDValue HHProd_v4i32 = - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, - DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32)); - - return DAG.getNode(SPUISD::SELB, MVT::v8i16, - DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB), - DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), - DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, - HHProd_v4i32, - DAG.getConstant(16, MVT::i16))), - DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32)); - } - - // This M00sE is N@stI! (apologies to Monty Python) - // - // SPU doesn't know how to do any 8-bit multiplication, so the solution - // is to break it all apart, sign extend, and reassemble the various - // intermediate products. - case MVT::v16i8: { - SDValue rA = Op.getOperand(0); - SDValue rB = Op.getOperand(1); - SDValue c8 = DAG.getConstant(8, MVT::i32); - SDValue c16 = DAG.getConstant(16, MVT::i32); - - SDValue LLProd = - DAG.getNode(SPUISD::MPY, MVT::v8i16, - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA), - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB)); - - SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8); - - SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8); - - SDValue LHProd = - DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, - DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8); - - SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, - DAG.getConstant(0x2222, MVT::i16)); - - SDValue LoProdParts = - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, - DAG.getNode(SPUISD::SELB, MVT::v8i16, - LLProd, LHProd, FSMBmask)); - - SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32); - - SDValue LoProd = - DAG.getNode(ISD::AND, MVT::v4i32, - LoProdParts, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - LoProdMask, LoProdMask, - LoProdMask, LoProdMask)); - - SDValue rAH = - DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16); - - SDValue rBH = - DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16); - - SDValue HLProd = - DAG.getNode(SPUISD::MPY, MVT::v8i16, - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH), - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH)); - - SDValue HHProd_1 = - DAG.getNode(SPUISD::MPY, MVT::v8i16, - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, - DAG.getNode(SPUISD::VEC_SRA, - MVT::v4i32, rAH, c8)), - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, - DAG.getNode(SPUISD::VEC_SRA, - MVT::v4i32, rBH, c8))); - - SDValue HHProd = - DAG.getNode(SPUISD::SELB, MVT::v8i16, - HLProd, - DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8), - FSMBmask); - - SDValue HiProd = - DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16); - - return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, - DAG.getNode(ISD::OR, MVT::v4i32, - LoProd, HiProd)); - } - } - - return SDValue(); -} - -static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - - SDValue A = Op.getOperand(0); - SDValue B = Op.getOperand(1); - MVT VT = Op.getValueType(); - - unsigned VRegBR, VRegC; - - if (VT == MVT::f32) { - VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); - VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); - } else { - VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - } - // TODO: make sure we're feeding FPInterp the right arguments - // Right now: fi B, frest(B) - - // Computes BRcpl = - // (Floating Interpolate (FP Reciprocal Estimate B)) - SDValue BRcpl = - DAG.getCopyToReg(DAG.getEntryNode(), VRegBR, - DAG.getNode(SPUISD::FPInterp, VT, B, - DAG.getNode(SPUISD::FPRecipEst, VT, B))); - - // Computes A * BRcpl and stores in a temporary register - SDValue AxBRcpl = - DAG.getCopyToReg(BRcpl, VRegC, - DAG.getNode(ISD::FMUL, VT, A, - DAG.getCopyFromReg(BRcpl, VRegBR, VT))); - // What's the Chain variable do? It's magic! - // TODO: set Chain = Op(0).getEntryNode() - - return DAG.getNode(ISD::FADD, VT, - DAG.getCopyFromReg(AxBRcpl, VRegC, VT), - DAG.getNode(ISD::FMUL, VT, - DAG.getCopyFromReg(AxBRcpl, VRegBR, VT), - DAG.getNode(ISD::FSUB, VT, A, - DAG.getNode(ISD::FMUL, VT, B, - DAG.getCopyFromReg(AxBRcpl, VRegC, VT))))); -} - static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); SDValue N = Op.getOperand(0); SDValue Elt = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); SDValue retval; if (ConstantSDNode *C = dyn_cast(Elt)) { @@ -2067,17 +1927,17 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // sanity checks: if (VT == MVT::i8 && EltNo >= 16) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); else if (VT == MVT::i16 && EltNo >= 8) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); else if (VT == MVT::i32 && EltNo >= 4) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); else if (VT == MVT::i64 && EltNo >= 2) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { // i32 and i64: Element 0 is the preferred slot - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N); + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); } // Need to generate shuffle mask and extract: @@ -2128,7 +1988,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue ShufMask[4]; for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { - unsigned bidx = i / 4; + unsigned bidx = i * 4; unsigned int bits = ((ShufBytes[bidx] << 24) | (ShufBytes[bidx+1] << 16) | (ShufBytes[bidx+2] << 8) | @@ -2136,25 +1996,25 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { ShufMask[i] = DAG.getConstant(bits, MVT::i32); } - SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufMask[0], - sizeof(ShufMask) / sizeof(ShufMask[0])); + SDValue ShufMaskVec = + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); - retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(SPUISD::SHUFB, N.getValueType(), + retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), N, N, ShufMaskVec)); } else { // Variable index: Rotate the requested element into slot 0, then replicate // slot 0 across the vector MVT VecVT = N.getValueType(); if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { - cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n"; - abort(); + llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" + "vector type!"); } // Make life easier by making sure the index is zero-extended to i32 if (Elt.getValueType() != MVT::i32) - Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt); + Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); // Scale the index to a bit/byte shift quantity APInt scaleFactor = @@ -2164,11 +2024,11 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { if (scaleShift > 0) { // Scale the shift factor: - Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt, - DAG.getConstant(scaleShift, MVT::i32)); + Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, + DAG.getConstant(scaleShift, MVT::i32)); } - vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt); + vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt); // Replicate the bytes starting at byte 0 across the entire vector (for // consistency with the notion of a unified register set) @@ -2176,40 +2036,41 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { switch (VT.getSimpleVT()) { default: - cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n"; - abort(); + llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" + "type"); /*NOTREACHED*/ case MVT::i8: { SDValue factor = DAG.getConstant(0x00000000, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor, - factor, factor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); break; } case MVT::i16: { SDValue factor = DAG.getConstant(0x00010001, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor, - factor, factor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); break; } case MVT::i32: case MVT::f32: { SDValue factor = DAG.getConstant(0x00010203, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor, - factor, factor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); break; } case MVT::i64: case MVT::f64: { SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor, - loFactor, hiFactor); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + loFactor, hiFactor, loFactor, hiFactor); break; } } - retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate)); + retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(SPUISD::SHUFB, dl, VecVT, + vecShift, vecShift, replicate)); } return retval; @@ -2219,326 +2080,126 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue VecOp = Op.getOperand(0); SDValue ValOp = Op.getOperand(1); SDValue IdxOp = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); MVT VT = Op.getValueType(); ConstantSDNode *CN = cast(IdxOp); assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - // Use $2 because it's always 16-byte aligned and it's available: - SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT); + // Use $sp ($1) because it's always 16-byte aligned and it's available: + SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + DAG.getRegister(SPU::R1, PtrVT), + DAG.getConstant(CN->getSExtValue(), PtrVT)); + SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); SDValue result = - DAG.getNode(SPUISD::SHUFB, VT, - DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp), + DAG.getNode(SPUISD::SHUFB, dl, VT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), VecOp, - DAG.getNode(SPUISD::SHUFFLE_MASK, VT, - DAG.getNode(ISD::ADD, PtrVT, - PtrBase, - DAG.getConstant(CN->getZExtValue(), - PtrVT)))); + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask)); return result; } -static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) +static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, + const TargetLowering &TLI) { SDValue N0 = Op.getOperand(0); // Everything has at least one operand + DebugLoc dl = Op.getDebugLoc(); + MVT ShiftVT = TLI.getShiftAmountTy(); assert(Op.getValueType() == MVT::i8); switch (Opc) { default: - assert(0 && "Unhandled i8 math operator"); + llvm_unreachable("Unhandled i8 math operator"); /*NOTREACHED*/ break; + case ISD::ADD: { + // 8-bit addition: Promote the arguments up to 16-bits and truncate + // the result: + SDValue N1 = Op.getOperand(1); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + + } + case ISD::SUB: { // 8-bit subtraction: Promote the arguments up to 16-bits and truncate // the result: SDValue N1 = Op.getOperand(1); - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i16)); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, N0, N1)); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); } case ISD::ROTR: case ISD::ROTL: { SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(MVT::i32) - ? ISD::ZERO_EXTEND - : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i32, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i32)); + MVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) + ? ISD::ZERO_EXTEND + : ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + // Replicate lower 8-bits into upper 8: SDValue ExpandArg = - DAG.getNode(ISD::OR, MVT::i16, N0, - DAG.getNode(ISD::SHL, MVT::i16, + DAG.getNode(ISD::OR, dl, MVT::i16, N0, + DAG.getNode(ISD::SHL, dl, MVT::i16, N0, DAG.getConstant(8, MVT::i32))); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, ExpandArg, N1)); + + // Truncate back down to i8 + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); } case ISD::SRL: case ISD::SHL: { SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(MVT::i16) - ? ISD::ZERO_EXTEND - : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i16)); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, N0, N1)); - } - case ISD::SRA: { - SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(MVT::i16) - ? ISD::SIGN_EXTEND - : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i16)); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, N0, N1)); - } - case ISD::MUL: { - SDValue N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE; - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i16)); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, N0, N1)); - break; - } - } + MVT N1VT = N1.getValueType(); - return SDValue(); -} + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = ISD::ZERO_EXTEND; -static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) -{ - MVT VT = Op.getValueType(); - MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); - - SDValue Op0 = Op.getOperand(0); + if (N1.getValueType().bitsGT(ShiftVT)) + N1Opc = ISD::TRUNCATE; - switch (Opc) { - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::ANY_EXTEND: { - MVT Op0VT = Op0.getValueType(); - MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); - - assert(Op0VT == MVT::i32 - && "CellSPU: Zero/sign extending something other than i32"); - - DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n"); - - SDValue PromoteScalar = - DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0); - - if (Opc != ISD::SIGN_EXTEND) { - // Use a shuffle to zero extend the i32 to i64 directly: - SDValue shufMask = - DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT, - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x00010203, MVT::i32), - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x08090a0b, MVT::i32)); - SDValue zextShuffle = - DAG.getNode(SPUISD::SHUFB, Op0VecVT, - PromoteScalar, PromoteScalar, shufMask); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle)); - } else { - // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift - // right and propagate the sign bit) instruction. - SDValue RotQuad = - DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT, - PromoteScalar, DAG.getConstant(4, MVT::i32)); - SDValue SignQuad = - DAG.getNode(SPUISD::VEC_SRA, Op0VecVT, - PromoteScalar, DAG.getConstant(32, MVT::i32)); - SDValue SelMask = - DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT, - DAG.getConstant(0xf0f0, MVT::i16)); - SDValue CombineQuad = - DAG.getNode(SPUISD::SELB, Op0VecVT, - SignQuad, RotQuad, SelMask); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad)); + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); } - } - case ISD::ADD: { - // Turn operands into vectors to satisfy type checking (shufb works on - // vectors) - SDValue Op0 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0)); - SDValue Op1 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1)); - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - - SDValue CarryGen = - DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1); - SDValue ShiftedCarry = - DAG.getNode(SPUISD::SHUFB, MVT::v2i64, - CarryGen, CarryGen, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, - DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64, - Op0, Op1, ShiftedCarry)); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); } + case ISD::SRA: { + SDValue N1 = Op.getOperand(1); + MVT N1VT = N1.getValueType(); - case ISD::SUB: { - // Turn operands into vectors to satisfy type checking (shufb works on - // vectors) - SDValue Op0 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0)); - SDValue Op1 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1)); - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - - SDValue BorrowGen = - DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1); - SDValue ShiftedBorrow = - DAG.getNode(SPUISD::SHUFB, MVT::v2i64, - BorrowGen, BorrowGen, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = ISD::SIGN_EXTEND; - return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, - DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64, - Op0, Op1, ShiftedBorrow)); - } + if (N1VT.bitsGT(ShiftVT)) + N1Opc = ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } - case ISD::SHL: { - SDValue ShiftAmt = Op.getOperand(1); - MVT ShiftAmtVT = ShiftAmt.getValueType(); - SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0); - SDValue MaskLower = - DAG.getNode(SPUISD::SELB, VecVT, - Op0Vec, - DAG.getConstant(0, VecVT), - DAG.getNode(SPUISD::SELECT_MASK, VecVT, - DAG.getConstant(0xff00ULL, MVT::i16))); - SDValue ShiftAmtBytes = - DAG.getNode(ISD::SRL, ShiftAmtVT, - ShiftAmt, - DAG.getConstant(3, ShiftAmtVT)); - SDValue ShiftAmtBits = - DAG.getNode(ISD::AND, ShiftAmtVT, - ShiftAmt, - DAG.getConstant(7, ShiftAmtVT)); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT, - DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, - MaskLower, ShiftAmtBytes), - ShiftAmtBits)); - } - - case ISD::SRL: { - MVT VT = Op.getValueType(); - SDValue ShiftAmt = Op.getOperand(1); - MVT ShiftAmtVT = ShiftAmt.getValueType(); - SDValue ShiftAmtBytes = - DAG.getNode(ISD::SRL, ShiftAmtVT, - ShiftAmt, - DAG.getConstant(3, ShiftAmtVT)); - SDValue ShiftAmtBits = - DAG.getNode(ISD::AND, ShiftAmtVT, - ShiftAmt, - DAG.getConstant(7, ShiftAmtVT)); - - return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT, - DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT, - Op0, ShiftAmtBytes), - ShiftAmtBits); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); } + case ISD::MUL: { + SDValue N1 = Op.getOperand(1); - case ISD::SRA: { - // Promote Op0 to vector - SDValue Op0 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0)); - SDValue ShiftAmt = Op.getOperand(1); - MVT ShiftVT = ShiftAmt.getValueType(); - - // Negate variable shift amounts - if (!isa(ShiftAmt)) { - ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT, - DAG.getConstant(0, ShiftVT), ShiftAmt); - } - - SDValue UpperHalfSign = - DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32, - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, - DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64, - Op0, DAG.getConstant(31, MVT::i32)))); - SDValue UpperHalfSignMask = - DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign); - SDValue UpperLowerMask = - DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, - DAG.getConstant(0xff00, MVT::i16)); - SDValue UpperLowerSelect = - DAG.getNode(SPUISD::SELB, MVT::v2i64, - UpperHalfSignMask, Op0, UpperLowerMask); - SDValue RotateLeftBytes = - DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64, - UpperLowerSelect, ShiftAmt); - SDValue RotateLeftBits = - DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64, - RotateLeftBytes, ShiftAmt); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, - RotateLeftBits); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + break; } } @@ -2551,6 +2212,7 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) { SDValue ConstVec; SDValue Arg; MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); ConstVec = Op.getOperand(0); Arg = Op.getOperand(1); @@ -2567,58 +2229,32 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) { } if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { - uint64_t VectorBits[2]; - uint64_t UndefBits[2]; - uint64_t SplatBits, SplatUndef; - int SplatSize; - - if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits) - && isConstantSplat(VectorBits, UndefBits, - VT.getVectorElementType().getSizeInBits(), - SplatBits, SplatUndef, SplatSize)) { - SDValue tcVec[16]; + BuildVectorSDNode *BCN = dyn_cast(ConstVec.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); + + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); + + if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + && minSplatBits <= SplatBitSize) { + uint64_t SplatBits = APSplatBits.getZExtValue(); SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); - const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]); - // Turn the BUILD_VECTOR into a set of target constants: - for (size_t i = 0; i < tcVecSize; ++i) - tcVec[i] = tc; - - return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg, - DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize)); + SmallVector tcVec; + tcVec.assign(16, tc); + return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); } } + // These operations (AND, OR, XOR) are legal, they just couldn't be custom // lowered. Return the operation, rather than a null SDValue. return Op; } -//! Lower i32 multiplication -static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT, - unsigned Opc) { - switch (VT.getSimpleVT()) { - default: - cerr << "CellSPU: Unknown LowerMUL value type, got " - << Op.getValueType().getMVTString() - << "\n"; - abort(); - /*NOTREACHED*/ - - case MVT::i32: { - SDValue rA = Op.getOperand(0); - SDValue rB = Op.getOperand(1); - - return DAG.getNode(ISD::ADD, MVT::i32, - DAG.getNode(ISD::ADD, MVT::i32, - DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB), - DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)), - DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB)); - } - } - - return SDValue(); -} - //! Custom lowering for CTPOP (count population) /*! Custom lowering code that counts the number ones in the input @@ -2628,6 +2264,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT, static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); + DebugLoc dl = Op.getDebugLoc(); switch (VT.getSimpleVT()) { default: @@ -2636,10 +2273,10 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { SDValue N = Op.getOperand(0); SDValue Elt0 = DAG.getConstant(0, MVT::i32); - SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); - SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); } case MVT::i16: { @@ -2653,22 +2290,22 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); SDValue Shift1 = DAG.getConstant(8, MVT::i32); - SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); - SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); // CNTB_result becomes the chain to which all of the virtual registers // CNTB_reg, SUM1_reg become associated: SDValue CNTB_result = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0); + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); SDValue CNTB_rescopy = - DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result); + DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); - SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16); + SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); - return DAG.getNode(ISD::AND, MVT::i16, - DAG.getNode(ISD::ADD, MVT::i16, - DAG.getNode(ISD::SRL, MVT::i16, + return DAG.getNode(ISD::AND, dl, MVT::i16, + DAG.getNode(ISD::ADD, dl, MVT::i16, + DAG.getNode(ISD::SRL, dl, MVT::i16, Tmp1, Shift1), Tmp1), Mask0); @@ -2687,37 +2324,38 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { SDValue Shift1 = DAG.getConstant(16, MVT::i32); SDValue Shift2 = DAG.getConstant(8, MVT::i32); - SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); - SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); // CNTB_result becomes the chain to which all of the virtual registers // CNTB_reg, SUM1_reg become associated: SDValue CNTB_result = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0); + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); SDValue CNTB_rescopy = - DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result); + DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); SDValue Comp1 = - DAG.getNode(ISD::SRL, MVT::i32, - DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1); + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), + Shift1); SDValue Sum1 = - DAG.getNode(ISD::ADD, MVT::i32, - Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32)); + DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, + DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); SDValue Sum1_rescopy = - DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1); + DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); SDValue Comp2 = - DAG.getNode(ISD::SRL, MVT::i32, - DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32), + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), Shift2); SDValue Sum2 = - DAG.getNode(ISD::ADD, MVT::i32, Comp2, - DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32)); + DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, + DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); - return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0); + return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); } case MVT::i64: @@ -2727,6 +2365,182 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { return SDValue(); } +//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 +/*! + f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. + All conversions to i64 are expanded to a libcall. + */ +static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + SPUTargetLowering &TLI) { + MVT OpVT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); + + if ((OpVT == MVT::i32 && Op0VT == MVT::f64) + || OpVT == MVT::i64) { + // Convert f32 / f64 to i32 / i64 via libcall. + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::FP_TO_SINT) + ? RTLIB::getFPTOSINT(Op0VT, OpVT) + : RTLIB::getFPTOUINT(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } + + return Op; +} + +//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 +/*! + i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. + All conversions from i64 are expanded to a libcall. + */ +static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, + SPUTargetLowering &TLI) { + MVT OpVT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); + + if ((OpVT == MVT::f64 && Op0VT == MVT::i32) + || Op0VT == MVT::i64) { + // Convert i32, i64 to f64 via libcall: + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::SINT_TO_FP) + ? RTLIB::getSINTTOFP(Op0VT, OpVT) + : RTLIB::getUINTTOFP(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } + + return Op; +} + +//! Lower ISD::SETCC +/*! + This handles MVT::f64 (double floating point) condition lowering + */ +static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + CondCodeSDNode *CC = dyn_cast(Op.getOperand(2)); + DebugLoc dl = Op.getDebugLoc(); + assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); + + SDValue lhs = Op.getOperand(0); + SDValue rhs = Op.getOperand(1); + MVT lhsVT = lhs.getValueType(); + assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); + + MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); + APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + MVT IntVT(MVT::i64); + + // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently + // selected to a NOP: + SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs); + SDValue lhsHi32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::SRL, dl, IntVT, + i64lhs, DAG.getConstant(32, MVT::i32))); + SDValue lhsHi32abs = + DAG.getNode(ISD::AND, dl, MVT::i32, + lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); + SDValue lhsLo32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); + + // SETO and SETUO only use the lhs operand: + if (CC->get() == ISD::SETO) { + // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of + // SETUO + APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, dl, ccResultVT, + DAG.getSetCC(dl, ccResultVT, + lhs, DAG.getConstantFP(0.0, lhsVT), + ISD::SETUO), + DAG.getConstant(ccResultAllOnes, ccResultVT)); + } else if (CC->get() == ISD::SETUO) { + // Evaluates to true if Op0 is [SQ]NaN + return DAG.getNode(ISD::AND, dl, ccResultVT, + DAG.getSetCC(dl, ccResultVT, + lhsHi32abs, + DAG.getConstant(0x7ff00000, MVT::i32), + ISD::SETGE), + DAG.getSetCC(dl, ccResultVT, + lhsLo32, + DAG.getConstant(0, MVT::i32), + ISD::SETGT)); + } + + SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs); + SDValue rhsHi32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::SRL, dl, IntVT, + i64rhs, DAG.getConstant(32, MVT::i32))); + + // If a value is negative, subtract from the sign magnitude constant: + SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); + + // Convert the sign-magnitude representation into 2's complement: + SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, + lhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); + SDValue lhsSelect = + DAG.getNode(ISD::SELECT, dl, IntVT, + lhsSelectMask, lhsSignMag2TC, i64lhs); + + SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, + rhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); + SDValue rhsSelect = + DAG.getNode(ISD::SELECT, dl, IntVT, + rhsSelectMask, rhsSignMag2TC, i64rhs); + + unsigned compareOp; + + switch (CC->get()) { + case ISD::SETOEQ: + case ISD::SETUEQ: + compareOp = ISD::SETEQ; break; + case ISD::SETOGT: + case ISD::SETUGT: + compareOp = ISD::SETGT; break; + case ISD::SETOGE: + case ISD::SETUGE: + compareOp = ISD::SETGE; break; + case ISD::SETOLT: + case ISD::SETULT: + compareOp = ISD::SETLT; break; + case ISD::SETOLE: + case ISD::SETULE: + compareOp = ISD::SETLE; break; + case ISD::SETUNE: + case ISD::SETONE: + compareOp = ISD::SETNE; break; + default: + llvm_report_error("CellSPU ISel Select: unimplemented f64 condition"); + } + + SDValue result = + DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, + (ISD::CondCode) compareOp); + + if ((CC->get() & 0x8) == 0) { + // Ordered comparison: + SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, + lhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, + rhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); + + result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); + } + + return result; +} + //! Lower ISD::SELECT_CC /*! ISD::SELECT_CC can (generally) be implemented directly on the SPU using the @@ -2739,20 +2553,64 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { assumption, given the simplisitc uses so far. */ -static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { MVT VT = Op.getValueType(); SDValue lhs = Op.getOperand(0); SDValue rhs = Op.getOperand(1); SDValue trueval = Op.getOperand(2); SDValue falseval = Op.getOperand(3); SDValue condition = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); + + // NOTE: SELB's arguments: $rA, $rB, $mask + // + // SELB selects bits from $rA where bits in $mask are 0, bits from $rB + // where bits in $mask are 1. CCond will be inverted, having 1s where the + // condition was true and 0s where the condition was false. Hence, the + // arguments to SELB get reversed. // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up // with another "cannot select select_cc" assert: - SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition); - return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare); + SDValue compare = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(Op.getValueType()), + lhs, rhs, condition); + return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); +} + +//! Custom lower ISD::TRUNCATE +static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) +{ + // Type to truncate to + MVT VT = Op.getValueType(); + MVT::SimpleValueType simpleVT = VT.getSimpleVT(); + MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); + DebugLoc dl = Op.getDebugLoc(); + + // Type to truncate from + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); + + if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) { + // Create shuffle mask, least significant doubleword of quadword + unsigned maskHigh = 0x08090a0b; + unsigned maskLow = 0x0c0d0e0f; + // Use a shuffle to perform the truncation + SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + DAG.getConstant(maskHigh, MVT::i32), + DAG.getConstant(maskLow, MVT::i32), + DAG.getConstant(maskHigh, MVT::i32), + DAG.getConstant(maskLow, MVT::i32)); + + SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, + Op0, Op0, shufMask); + + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); + } + + return SDValue(); // Leave the truncate unmolested } //! Custom (target-specific) lowering entry point @@ -2768,13 +2626,16 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) switch (Opc) { default: { +#ifndef NDEBUG cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; cerr << "Op.getOpcode() = " << Opc << "\n"; cerr << "*Op.getNode():\n"; Op.getNode()->dump(); - abort(); +#endif + llvm_unreachable(0); } case ISD::LOAD: + case ISD::EXTLOAD: case ISD::SEXTLOAD: case ISD::ZEXTLOAD: return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); @@ -2786,12 +2647,8 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); case ISD::JumpTable: return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::Constant: - return LowerConstant(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG); - case ISD::BRCOND: - return LowerBRCOND(Op, DAG); case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); case ISD::CALL: @@ -2799,11 +2656,7 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); - // i8, i64 math ops: - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::ANY_EXTEND: case ISD::ADD: case ISD::SUB: case ISD::ROTR: @@ -2812,12 +2665,18 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::SHL: case ISD::SRA: { if (VT == MVT::i8) - return LowerI8Math(Op, DAG, Opc); - else if (VT == MVT::i64) - return LowerI64Math(Op, DAG, Opc); + return LowerI8Math(Op, DAG, Opc, *this); break; } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + return LowerFP_TO_INT(Op, DAG, *this); + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return LowerINT_TO_FP(Op, DAG, *this); + // Vector-related lowering. case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); @@ -2838,29 +2697,20 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) // Vector and i8 multiply: case ISD::MUL: - if (VT.isVector()) - return LowerVectorMUL(Op, DAG); - else if (VT == MVT::i8) - return LowerI8Math(Op, DAG, Opc); - else - return LowerMUL(Op, DAG, VT, Opc); - - case ISD::FDIV: - if (VT == MVT::f32 || VT == MVT::v4f32) - return LowerFDIVf32(Op, DAG); -#if 0 - // This is probably a libcall - else if (Op.getValueType() == MVT::f64) - return LowerFDIVf64(Op, DAG); -#endif - else - assert(0 && "Calling FDIV on unsupported MVT"); + if (VT == MVT::i8) + return LowerI8Math(Op, DAG, Opc, *this); case ISD::CTPOP: return LowerCTPOP(Op, DAG); case ISD::SELECT_CC: - return LowerSELECT_CC(Op, DAG); + return LowerSELECT_CC(Op, DAG, *this); + + case ISD::SETCC: + return LowerSETCC(Op, DAG, *this); + + case ISD::TRUNCATE: + return LowerTRUNCATE(Op, DAG); } return SDValue(); @@ -2901,53 +2751,64 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const #endif const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); SelectionDAG &DAG = DCI.DAG; - SDValue Op0 = N->getOperand(0); // everything has at least one operand - SDValue Result; // Initially, NULL result + SDValue Op0 = N->getOperand(0); // everything has at least one operand + MVT NodeVT = N->getValueType(0); // The node's value type + MVT Op0VT = Op0.getValueType(); // The first operand's result + SDValue Result; // Initially, empty result + DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: break; case ISD::ADD: { SDValue Op1 = N->getOperand(1); - if (isa(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) { - SDValue Op01 = Op0.getOperand(1); - if (Op01.getOpcode() == ISD::Constant - || Op01.getOpcode() == ISD::TargetConstant) { - // (add , (SPUindirect , )) -> - // (SPUindirect , ) - ConstantSDNode *CN0 = cast(Op1); - ConstantSDNode *CN1 = cast(Op01); - SDValue combinedConst = - DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), - Op0.getValueType()); - - DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", " - << "(SPUindirect , " << CN1->getZExtValue() << "))\n"); - DEBUG(cerr << "With: (SPUindirect , " - << CN0->getZExtValue() + CN1->getZExtValue() << ")\n"); - return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(), - Op0.getOperand(0), combinedConst); + if (Op0.getOpcode() == SPUISD::IndirectAddr + || Op1.getOpcode() == SPUISD::IndirectAddr) { + // Normalize the operands to reduce repeated code + SDValue IndirectArg = Op0, AddArg = Op1; + + if (Op1.getOpcode() == SPUISD::IndirectAddr) { + IndirectArg = Op1; + AddArg = Op0; } - } else if (isa(Op0) - && Op1.getOpcode() == SPUISD::IndirectAddr) { - SDValue Op11 = Op1.getOperand(1); - if (Op11.getOpcode() == ISD::Constant - || Op11.getOpcode() == ISD::TargetConstant) { - // (add (SPUindirect , ), ) -> - // (SPUindirect , ) - ConstantSDNode *CN0 = cast(Op0); - ConstantSDNode *CN1 = cast(Op11); - SDValue combinedConst = - DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), - Op0.getValueType()); - - DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", " - << "(SPUindirect , " << CN1->getZExtValue() << "))\n"); - DEBUG(cerr << "With: (SPUindirect , " - << CN0->getZExtValue() + CN1->getZExtValue() << ")\n"); - - return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(), - Op1.getOperand(0), combinedConst); + + if (isa(AddArg)) { + ConstantSDNode *CN0 = cast (AddArg); + SDValue IndOp1 = IndirectArg.getOperand(1); + + if (CN0->isNullValue()) { + // (add (SPUindirect , ), 0) -> + // (SPUindirect , ) + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "\n" + << "Replace: (add (SPUindirect , ), 0)\n" + << "With: (SPUindirect , )\n"; + } +#endif + + return IndirectArg; + } else if (isa(IndOp1)) { + // (add (SPUindirect , ), ) -> + // (SPUindirect , ) + ConstantSDNode *CN1 = cast (IndOp1); + int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); + SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "\n" + << "Replace: (add (SPUindirect , " << CN1->getSExtValue() + << "), " << CN0->getSExtValue() << ")\n" + << "With: (SPUindirect , " + << combinedConst << ")\n"; + } +#endif + + return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, + IndirectArg, combinedValue); + } } } break; @@ -2955,16 +2816,19 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: { - if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && - N->getValueType(0) == Op0.getValueType()) { + if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { // (any_extend (SPUextract_elt0 )) -> // (SPUextract_elt0 ) // Types must match, however... - DEBUG(cerr << "Replace: "); - DEBUG(N->dump(&DAG)); - DEBUG(cerr << "\nWith: "); - DEBUG(Op0.getNode()->dump(&DAG)); - DEBUG(cerr << "\n"); +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "\nReplace: "; + N->dump(&DAG); + cerr << "\nWith: "; + Op0.getNode()->dump(&DAG); + cerr << "\n"; + } +#endif return Op0; } @@ -2972,8 +2836,8 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const } case SPUISD::IndirectAddr: { if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { - ConstantSDNode *CN = cast(N->getOperand(1)); - if (CN->getZExtValue() == 0) { + ConstantSDNode *CN = dyn_cast(N->getOperand(1)); + if (CN != 0 && CN->getZExtValue() == 0) { // (SPUindirect (SPUaform , 0), 0) -> // (SPUaform , 0) @@ -2985,6 +2849,25 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const return Op0; } + } else if (Op0.getOpcode() == ISD::ADD) { + SDValue Op1 = N->getOperand(1); + if (ConstantSDNode *CN1 = dyn_cast(Op1)) { + // (SPUindirect (add , ), 0) -> + // (SPUindirect , ) + if (CN1->isNullValue()) { + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "\n" + << "Replace: (SPUindirect (add , ), 0)\n" + << "With: (SPUindirect , )\n"; + } +#endif + + return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, + Op0.getOperand(0), Op0.getOperand(1)); + } + } } break; } @@ -2993,41 +2876,38 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const case SPUISD::VEC_SHL: case SPUISD::VEC_SRL: case SPUISD::VEC_SRA: - case SPUISD::ROTQUAD_RZ_BYTES: - case SPUISD::ROTQUAD_RZ_BITS: { + case SPUISD::ROTBYTES_LEFT: { SDValue Op1 = N->getOperand(1); - if (isa(Op1)) { - // Kill degenerate vector shifts: - ConstantSDNode *CN = cast(Op1); - - if (CN->getZExtValue() == 0) { + // Kill degenerate vector shifts: + if (ConstantSDNode *CN = dyn_cast(Op1)) { + if (CN->isNullValue()) { Result = Op0; } } break; } - case SPUISD::PROMOTE_SCALAR: { + case SPUISD::PREFSLOT2VEC: { switch (Op0.getOpcode()) { default: break; case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: { - // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 ))) -> + // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot ))) -> // - // but only if the SPUpromote_scalar and types match. + // but only if the SPUprefslot2vec and types match. SDValue Op00 = Op0.getOperand(0); if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { SDValue Op000 = Op00.getOperand(0); - if (Op000.getValueType() == N->getValueType(0)) { + if (Op000.getValueType() == NodeVT) { Result = Op000; } } break; } case SPUISD::VEC2PREFSLOT: { - // (SPUpromote_scalar (SPUextract_elt0 )) -> + // (SPUprefslot2vec (SPUvec2prefslot )) -> // Result = Op0.getOperand(0); break; @@ -3036,8 +2916,9 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const break; } } + // Otherwise, return unchanged. -#ifdef NDEBUG +#ifndef NDEBUG if (Result.getNode()) { DEBUG(cerr << "\nReplace.SPU: "); DEBUG(N->dump(&DAG)); @@ -3107,51 +2988,19 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, const SelectionDAG &DAG, unsigned Depth ) const { #if 0 - const uint64_t uint64_sizebits = sizeof(uint64_t) * 8; -#endif + const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; switch (Op.getOpcode()) { default: // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); break; - -#if 0 case CALL: case SHUFB: case SHUFFLE_MASK: case CNTB: -#endif - - case SPUISD::PROMOTE_SCALAR: { - SDValue Op0 = Op.getOperand(0); - MVT Op0VT = Op0.getValueType(); - unsigned Op0VTBits = Op0VT.getSizeInBits(); - uint64_t InMask = Op0VT.getIntegerVTBitMask(); - KnownZero |= APInt(Op0VTBits, ~InMask, false); - KnownOne |= APInt(Op0VTBits, InMask, false); - break; - } - + case SPUISD::PREFSLOT2VEC: case SPUISD::LDRESULT: case SPUISD::VEC2PREFSLOT: - case SPUISD::VEC2PREFSLOT_CHAINED: { - MVT OpVT = Op.getValueType(); - unsigned OpVTBits = OpVT.getSizeInBits(); - uint64_t InMask = OpVT.getIntegerVTBitMask(); - KnownZero |= APInt(OpVTBits, ~InMask, false); - KnownOne |= APInt(OpVTBits, InMask, false); - break; - } - -#if 0 - case EXTRACT_I1_ZEXT: - case EXTRACT_I1_SEXT: - case EXTRACT_I8_ZEXT: - case EXTRACT_I8_SEXT: - case MPY: - case MPYU: - case MPYH: - case MPYHH: case SPUISD::SHLQUAD_L_BITS: case SPUISD::SHLQUAD_L_BYTES: case SPUISD::VEC_SHL: @@ -3159,16 +3008,28 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case SPUISD::VEC_SRA: case SPUISD::VEC_ROTL: case SPUISD::VEC_ROTR: - case SPUISD::ROTQUAD_RZ_BYTES: - case SPUISD::ROTQUAD_RZ_BITS: case SPUISD::ROTBYTES_LEFT: - case SPUISD::ROTBYTES_LEFT_CHAINED: case SPUISD::SELECT_MASK: case SPUISD::SELB: - case SPUISD::FPInterp: - case SPUISD::FPRecipEst: - case SPUISD::SEXT32TO64: + } #endif +} + +unsigned +SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + switch (Op.getOpcode()) { + default: + return 1; + + case ISD::SETCC: { + MVT VT = Op.getValueType(); + + if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { + VT = MVT::i32; + } + return VT.getSizeInBits(); + } } }