From: Scott Michel Date: Thu, 17 Jan 2008 20:38:41 +0000 (+0000) Subject: Forward progress: crtbegin.c now compiles successfully! X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=58c5818c01e375a84dc601140470fa68638004cf;p=oota-llvm.git Forward progress: crtbegin.c now compiles successfully! Fixed CellSPU's A-form (local store) address mode, so that all globals, externals, constant pool and jump table symbols are now wrapped within a SPUISD::AFormAddr pseudo-instruction. This now identifies all local store memory addresses, although it requires a bit of legerdemain during instruction selection to properly select loads to and stores from local store, properly generating "LQA" instructions. Also added mul_ops.ll test harness for exercising integer multiplication. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46142 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 167d3c3c20a..3a50e3bcf04 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -159,16 +159,38 @@ namespace { int prefslot_byte; /// Byte offset of the "preferred" slot unsigned brcc_eq_ins; /// br_cc equal instruction unsigned brcc_neq_ins; /// br_cc not equal instruction + unsigned load_aform; /// A-form load instruction for this VT + unsigned store_aform; /// A-form store instruction for this VT }; const valtype_map_s valtype_map[] = { - { MVT::i1, 0, 3, 0, 0 }, - { MVT::i8, 0, 3, 0, 0 }, - { MVT::i16, SPU::ORHIr16, 2, SPU::BRHZ, SPU::BRHNZ }, - { MVT::i32, SPU::ORIr32, 0, SPU::BRZ, SPU::BRNZ }, - { MVT::i64, SPU::ORIr64, 0, 0, 0 }, - { MVT::f32, 0, 0, 0, 0 }, - { MVT::f64, 0, 0, 0, 0 } + { MVT::i1, 0, 3, 0, 0, 0, + 0 }, + { MVT::i8, SPU::ORBIr8, 3, 0, 0, SPU::LQAr8, + SPU::STQAr8 }, + { MVT::i16, SPU::ORHIr16, 2, SPU::BRHZ, SPU::BRHNZ, SPU::LQAr16, + SPU::STQAr16 }, + { MVT::i32, SPU::ORIr32, 0, SPU::BRZ, SPU::BRNZ, SPU::LQAr32, + SPU::STQAr32 }, + { MVT::i64, SPU::ORIr64, 0, 0, 0, SPU::LQAr64, + SPU::STQAr64 }, + { MVT::f32, 0, 0, 0, 0, SPU::LQAf32, + SPU::STQAf32 }, + { MVT::f64, 0, 0, 0, 0, SPU::LQAf64, + SPU::STQAf64 }, + // vector types... (sigh!) + { MVT::v16i8, 0, 0, 0, 0, SPU::LQAv16i8, + SPU::STQAv16i8 }, + { MVT::v8i16, 0, 0, 0, 0, SPU::LQAv8i16, + SPU::STQAv8i16 }, + { MVT::v4i32, 0, 0, 0, 0, SPU::LQAv4i32, + SPU::STQAv4i32 }, + { MVT::v2i64, 0, 0, 0, 0, SPU::LQAv2i64, + SPU::STQAv2i64 }, + { MVT::v4f32, 0, 0, 0, 0, SPU::LQAv4f32, + SPU::STQAv4f32 }, + { MVT::v2f64, 0, 0, 0, 0, SPU::LQAv2f64, + SPU::STQAv2f64 }, }; const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); @@ -465,14 +487,6 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, int32_t offset = (int32_t) CN->getSignExtended(); unsigned Opc0 = Op0.getOpcode(); - if ((offset & 0xf) != 0) { - // Unaligned offset: punt and let X-form address handle it. - // NOTE: This really doesn't have to be strictly 16-byte aligned, - // since the load/store quadword instructions will implicitly - // zero the lower 4 bits of the resulting address. - return false; - } - if (Opc0 == ISD::FrameIndex) { FrameIndexSDNode *FI = dyn_cast(Op0); DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset @@ -506,7 +520,8 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, const SDOperand Op0 = N.getOperand(0); // Frame index/base const SDOperand Op1 = N.getOperand(1); // Offset within base - if (Op0.getOpcode() != SPUISD::XFormAddr) { + if (Op0.getOpcode() == ISD::Constant + || Op0.getOpcode() == ISD::TargetConstant) { ConstantSDNode *CN = cast(Op1); assert(CN != 0 && "SelectDFormAddr/SPUISD::DFormAddr expecting constant"); Base = CurDAG->getTargetConstant(CN->getValue(), PtrTy); @@ -523,6 +538,11 @@ SPUDAGToDAGISel::SelectDFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, Index = CurDAG->getTargetFrameIndex(FI->getIndex(), PtrTy); return true; } + } else if (Opc == SPUISD::LDRESULT) { + // It's a load result dereference + Base = CurDAG->getTargetConstant(0, PtrTy); + Index = N.getOperand(0); + return true; } return false; @@ -550,24 +570,9 @@ SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, if (Opc == ISD::ADD) { SDOperand N1 = N.getOperand(0); SDOperand N2 = N.getOperand(1); - unsigned N1Opc = N1.getOpcode(); - unsigned N2Opc = N2.getOpcode(); - - if ((N1Opc == SPUISD::Hi && N2Opc == SPUISD::Lo) - || (N1Opc == SPUISD::Lo && N2Opc == SPUISD::Hi) - || (N1Opc == SPUISD::XFormAddr)) { - Base = N.getOperand(0); - Index = N.getOperand(1); - return true; - } else { - cerr << "SelectXFormAddr: Unhandled ADD operands:\n"; - N1.Val->dump(); - cerr << "\n"; - N2.Val->dump(); - cerr << "\n"; - abort(); - /*UNREACHED*/ - } + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; } else if (Opc == SPUISD::XFormAddr) { Base = N; Index = N.getOperand(1); @@ -608,6 +613,62 @@ SPUDAGToDAGISel::SelectXFormAddr(SDOperand Op, SDOperand N, SDOperand &Base, return false; } +//! Emit load for A-form addresses +/* + */ +SDNode * +Emit_LOAD_AFormAddr(SDOperand Op, SelectionDAG &CurDAG, SPUDAGToDAGISel &ISel) +{ + SDNode *Result; + MVT::ValueType OpVT = Op.getValueType(); + SDOperand Chain = Op.getOperand(0); + SDOperand Ptr = Op.getOperand(1); + SDOperand PtrArg = Ptr.getOperand(0); + SDOperand PtrOffs = Ptr.getOperand(1); + const valtype_map_s *vtm = getValueTypeMapEntry(OpVT); + + if (PtrOffs.getOpcode() == ISD::Constant) { + ConstantSDNode *CN = cast(PtrOffs); + MVT::ValueType PVT = PtrOffs.getValueType(); + PtrOffs = CurDAG.getTargetConstant(CN->getValue(), PVT); + } + ISel.AddToISelQueue(PtrArg); + ISel.AddToISelQueue(PtrOffs); + ISel.AddToISelQueue(Chain); + Result = CurDAG.getTargetNode(vtm->load_aform, OpVT, MVT::Other, PtrArg, PtrOffs, Chain); + Chain = SDOperand(Result, 1); + return Result; +} + +//! Emit store for A-form addresses +/* + */ +SDNode * +Emit_STORE_AFormAddr(SDOperand Op, SelectionDAG &CurDAG, SPUDAGToDAGISel &ISel) +{ + SDNode *Result; + SDOperand Chain = Op.getOperand(0); + SDOperand Val = Op.getOperand(1); + SDOperand Ptr = Op.getOperand(2); + SDOperand PtrArg = Ptr.getOperand(0); + SDOperand PtrOffs = Ptr.getOperand(1); + const valtype_map_s *vtm = getValueTypeMapEntry(Val.getValueType()); + + if (PtrOffs.getOpcode() == ISD::Constant) { + ConstantSDNode *CN = cast(PtrOffs); + MVT::ValueType PVT = PtrOffs.getValueType(); + PtrOffs = CurDAG.getTargetConstant(CN->getValue(), PVT); + } + ISel.AddToISelQueue(Val); + ISel.AddToISelQueue(PtrArg); + ISel.AddToISelQueue(PtrOffs); + ISel.AddToISelQueue(Chain); + SDOperand Ops[4] = { Val, PtrArg, PtrOffs, Chain }; + Result = CurDAG.getTargetNode(vtm->store_aform, MVT::Other, Ops, 4); + Chain = SDOperand(Result, 1); + return Result; +} + //! Convert the operand from a target-independent to a target-specific node /*! */ @@ -615,6 +676,10 @@ SDNode * SPUDAGToDAGISel::Select(SDOperand Op) { SDNode *N = Op.Val; unsigned Opc = N->getOpcode(); + int n_ops = -1; + unsigned NewOpc; + MVT::ValueType OpVT = Op.getValueType(); + SDOperand Ops[8]; if (Opc >= ISD::BUILTIN_OP_END && Opc < SPUISD::FIRST_NUMBER) { return NULL; // Already selected. @@ -626,9 +691,32 @@ SPUDAGToDAGISel::Select(SDOperand Op) { SDOperand TFI = CurDAG->getTargetFrameIndex(FI, PtrVT); DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AI32 , 0\n"); - if (N->hasOneUse()) - return CurDAG->SelectNodeTo(N, SPU::AIr32, Op.getValueType(), TFI, Zero); - CurDAG->getTargetNode(SPU::AIr32, Op.getValueType(), TFI, Zero); + NewOpc = SPU::AIr32; + Ops[0] = TFI; + Ops[1] = Zero; + n_ops = 2; + } else if (Opc == ISD::LOAD + && Op.getOperand(1).getOpcode() == SPUISD::AFormAddr) { + return Emit_LOAD_AFormAddr(Op, *CurDAG, *this); + } else if (Opc == ISD::STORE + && Op.getOperand(2).getOpcode() == SPUISD::AFormAddr) { + return Emit_STORE_AFormAddr(Op, *CurDAG, *this); + } else if (Opc == ISD::ZERO_EXTEND) { + // (zero_extend:i16 (and:i8 , )) + const SDOperand &Op1 = N->getOperand(0); + + if (Op.getValueType() == MVT::i16 && Op1.getValueType() == MVT::i8) { + if (Op1.getOpcode() == ISD::AND) { + // Fold this into a single ANDHI. This is often seen in expansions of i1 + // to i8, then i8 to i16 in logical/branching operations. + DEBUG(cerr << "CellSPU: Coalescing (zero_extend:i16 (and:i8 " + ", ))\n"); + NewOpc = SPU::ANDHI1To2; + Ops[0] = Op1.getOperand(0); + Ops[1] = Op1.getOperand(1); + n_ops = 2; + } + } } else if (Opc == SPUISD::LDRESULT) { // Custom select instructions for LDRESULT unsigned VT = N->getValueType(0); @@ -650,20 +738,54 @@ SPUDAGToDAGISel::Select(SDOperand Op) { Opc = vtm->ldresult_ins; AddToISelQueue(Zero); - Result = CurDAG->SelectNodeTo(N, Opc, VT, MVT::Other, Arg, Zero, Chain); + Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Zero, Chain); } else { - Result = - CurDAG->SelectNodeTo(N, (VT == MVT::f32 ? SPU::ORf32 : SPU::ORf64), - MVT::Other, Arg, Arg, Chain); + Opc = (VT == MVT::f32 ? SPU::ORf32 : SPU::ORf64); + Result = CurDAG->getTargetNode(Opc, MVT::Other, Arg, Arg, Chain); } Chain = SDOperand(Result, 1); AddToISelQueue(Chain); return Result; + } else if (Opc == SPUISD::XFormAddr) { + SDOperand Op0 = Op.getOperand(0); + if (Op0.getOpcode() == SPUISD::LDRESULT + || Op0.getOpcode() == SPUISD::AFormAddr) { + // (XFormAddr (LDRESULT|AFormAddr, imm)) + SDOperand Op1 = Op.getOperand(1); + MVT::ValueType VT = Op.getValueType(); + + DEBUG(cerr << "CellSPU: XFormAddr(" + << (Op0.getOpcode() == SPUISD::LDRESULT + ? "LDRESULT" + : "AFormAddr") + << ", imm):\nOp0 = "); + DEBUG(Op.getOperand(0).Val->dump(CurDAG)); + DEBUG(cerr << "\nOp1 = "); + DEBUG(Op.getOperand(1).Val->dump(CurDAG)); + DEBUG(cerr << "\n"); + + if (Op1.getOpcode() == ISD::Constant) { + ConstantSDNode *CN = cast(Op1); + Op1 = CurDAG->getTargetConstant(CN->getValue(), VT); + } + AddToISelQueue(Op0); + AddToISelQueue(Op1); + NewOpc = SPU::AIr32; + Ops[0] = Op0; + Ops[1] = Op1; + n_ops = 2; + } } - return SelectCode(Op); + if (n_ops > 0) { + if (N->hasOneUse()) + return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops); + else + return CurDAG->getTargetNode(NewOpc, OpVT, Ops, n_ops); + } else + return SelectCode(Op); } /// createPPCISelDag - This pass converts a legalized DAG into a diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index e2a1b43d1a0..0f1d0452804 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -100,6 +100,14 @@ namespace { || Opc == ISD::TargetExternalSymbol || Opc == SPUISD::AFormAddr); } + + //! Predicate that returns true if the operand is an indirect target + bool isIndirectOperand(const SDOperand &Op) + { + const unsigned Opc = Op.getOpcode(); + return (Opc == ISD::Register + || Opc == SPUISD::LDRESULT); + } } SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) @@ -126,7 +134,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); // SPU has no sign or zero extended loads for i1, i8, i16: - setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom); + setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote); setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote); setTruncStoreAction(MVT::i8, MVT::i1, Custom); @@ -160,10 +168,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::STORE, sctype, Custom); } - // SPU supports BRCOND, although DAGCombine will convert BRCONDs - // into BR_CCs. BR_CC instructions are custom selected in - // SPUDAGToDAGISel. - setOperationAction(ISD::BRCOND, MVT::Other, Legal); + // Custom lower BRCOND for i1, i8 to "promote" the result to + // i32 and i16, respectively. + setOperationAction(ISD::BRCOND, MVT::Other, Custom); // Expand the jumptable branches setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -472,7 +479,7 @@ AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST, SDOperand Op1 = basePtr.Val->getOperand(1); if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) { - const ConstantSDNode *CN = cast(basePtr.Val->getOperand(1)); + const ConstantSDNode *CN = cast(basePtr.getOperand(1)); alignOffs = (int) CN->getValue(); prefSlotOffs = (int) (alignOffs & 0xf); @@ -482,15 +489,13 @@ AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST, prefSlotOffs -= vtm->prefslot_byte; basePtr = basePtr.getOperand(0); - // Modify alignment, since the ADD is likely from getElementPtr: - switch (basePtr.getOpcode()) { - case ISD::GlobalAddress: - case ISD::TargetGlobalAddress: { - GlobalAddressSDNode *GN = cast(basePtr.Val); - const GlobalValue *GV = GN->getGlobal(); - alignment = GV->getAlignment(); - break; - } + // Loading from memory, can we adjust alignment? + if (basePtr.getOpcode() == SPUISD::AFormAddr) { + SDOperand APtr = basePtr.getOperand(0); + if (APtr.getOpcode() == ISD::TargetGlobalAddress) { + GlobalAddressSDNode *GSDN = cast(APtr); + alignment = GSDN->getGlobal()->getAlignment(); + } } } else { alignOffs = 0; @@ -504,15 +509,9 @@ AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST, if (alignment == 16) { // Realign the base pointer as a D-Form address: if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) { - if (isMemoryOperand(basePtr)) { - SDOperand Zero = DAG.getConstant(0, PtrVT); - unsigned Opc = (!ST->usingLargeMem() - ? SPUISD::AFormAddr - : SPUISD::XFormAddr); - basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero); - } - basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, - basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT)); + basePtr = DAG.getNode(ISD::ADD, PtrVT, + basePtr, + DAG.getConstant((alignOffs & ~0xf), PtrVT)); } // Emit the vector load: @@ -524,7 +523,7 @@ AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST, // Unaligned load or we're using the "large memory" model, which means that // we have to be very pessimistic: - if (isMemoryOperand(basePtr)) { + if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) { basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); } @@ -551,13 +550,6 @@ LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { unsigned alignment = LN->getAlignment(); SDOperand Ops[8]; - // For an extending load of an i1 variable, just call it i8 (or whatever we - // were passed) and make it zero-extended: - if (VT == MVT::i1) { - VT = OpVT; - ExtType = ISD::ZEXTLOAD; - } - switch (LN->getAddressingMode()) { case ISD::UNINDEXED: { int offset, rotamt; @@ -575,15 +567,13 @@ LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (rotamt != 0 || !was16aligned) { SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other); + Ops[0] = the_chain; + Ops[1] = result; if (was16aligned) { - Ops[0] = the_chain; - Ops[1] = result; Ops[2] = DAG.getConstant(rotamt, MVT::i16); } else { MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); LoadSDNode *LN1 = cast(result); - Ops[0] = the_chain; - Ops[1] = result; Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(), DAG.getConstant(rotamt, PtrVT)); } @@ -628,9 +618,14 @@ LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { } SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); - SDOperand retops[2] = { result, the_chain }; - - result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2); + SDOperand retops[3] = { + result, + the_chain, + DAG.getConstant(alignment, MVT::i32) + }; + + result = DAG.getNode(SPUISD::LDRESULT, retvts, + retops, sizeof(retops) / sizeof(retops[0])); return result; } case ISD::PRE_INC: @@ -712,6 +707,7 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { DEBUG(cerr << "\n"); if (basePtr.getOpcode() == SPUISD::DFormAddr) { + // Hmmmm... do we ever actually hit this code? insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, basePtr.getOperand(0), insertEltOffs); @@ -720,6 +716,8 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { && basePtr.getOperand(0).getOpcode() == SPUISD::XFormAddr)) { insertEltPtr = basePtr; } else { + // $sp is always aligned, so use it instead of potentially loading an + // address into a new register: insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT, DAG.getRegister(SPU::R1, PtrVT), insertEltOffs); @@ -766,10 +764,9 @@ LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (TM.getRelocationModel() == Reloc::Static) { if (!ST->usingLargeMem()) { // Just return the SDOperand with the constant pool address in it. - return CPI; + return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero); } else { #if 1 - // Generate hi/lo address pair SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero); SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero); @@ -795,7 +792,7 @@ LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (TM.getRelocationModel() == Reloc::Static) { return (!ST->usingLargeMem() - ? JTI + ? DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero) : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero)); } @@ -815,7 +812,7 @@ LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (TM.getRelocationModel() == Reloc::Static) { return (!ST->usingLargeMem() - ? GA + ? DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero) : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero)); } else { cerr << "LowerGlobalAddress: Relocation model other than static not " @@ -880,6 +877,24 @@ LowerConstantFP(SDOperand Op, SelectionDAG &DAG) { return SDOperand(); } +//! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16) +static SDOperand +LowerBRCOND(SDOperand Op, SelectionDAG &DAG) +{ + SDOperand Cond = Op.getOperand(1); + MVT::ValueType CondVT = Cond.getValueType(); + MVT::ValueType CondNVT; + + if (CondVT == MVT::i1 || CondVT == MVT::i8) { + CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16); + return DAG.getNode(ISD::BRCOND, Op.getValueType(), + Op.getOperand(0), + DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)), + Op.getOperand(2)); + } else + return SDOperand(); // Unchanged +} + static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex) { @@ -2458,8 +2473,10 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) return LowerConstant(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG); + case ISD::BRCOND: + return LowerBRCOND(Op, DAG); case ISD::FORMAL_ARGUMENTS: - return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); + return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); case ISD::CALL: return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl()); case ISD::RET: @@ -2537,48 +2554,16 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const #if 0 TargetMachine &TM = getTargetMachine(); SelectionDAG &DAG = DCI.DAG; -#endif SDOperand N0 = N->getOperand(0); // everything has at least one operand switch (N->getOpcode()) { default: break; - - // Look for obvious optimizations for shift left: - // a) Replace 0 << V with 0 - // b) Replace V << 0 with V - // - // N.B: llvm will generate an undef node if the shift amount is greater than - // 15 (e.g.: V << 16), which will naturally trigger an assert. - case SPU::SHLIr32: - case SPU::SHLHIr16: - case SPU::SHLQBIIvec: - case SPU::ROTHIr16: - case SPU::ROTHIr16_i32: - case SPU::ROTIr32: - case SPU::ROTIr32_i16: - case SPU::ROTQBYIvec: - case SPU::ROTQBYBIvec: - case SPU::ROTQBIIvec: - case SPU::ROTHMIr16: - case SPU::ROTMIr32: - case SPU::ROTQMBYIvec: { - if (N0.getOpcode() == ISD::Constant) { - if (ConstantSDNode *C = cast(N0)) { - if (C->getValue() == 0) // 0 << V -> 0. - return N0; - } - } - SDOperand N1 = N->getOperand(1); - if (N1.getOpcode() == ISD::Constant) { - if (ConstantSDNode *C = cast(N1)) { - if (C->getValue() == 0) // V << 0 -> V - return N1; - } - } - break; - } + // Do something creative here for ISD nodes that can be coalesced in unique + // ways. } +#endif + // Otherwise, return unchanged. return SDOperand(); } diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 94aa390fe9d..71cb37dc08d 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -1359,6 +1359,9 @@ def ORi8_v16i8: def : Pat<(SPUextract_elt0 (v16i8 VECREG:$rA)), (ORi8_v16i8 VECREG:$rA, VECREG:$rA)>; +def : Pat<(SPUextract_elt0_chained (v16i8 VECREG:$rA)), + (ORi8_v16i8 VECREG:$rA, VECREG:$rA)>; + def ORi16_v8i16: RRForm<0b10000010000, (outs R16C:$rT), (ins VECREG:$rA, VECREG:$rB), "or\t$rT, $rA, $rB", IntegerOp, @@ -2868,6 +2871,9 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { */ } +//===----------------------------------------------------------------------===// +// brcond predicates: +//===----------------------------------------------------------------------===// def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest), (BRHZ R16C:$rA, bb:$dest)>; def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest), @@ -2876,7 +2882,7 @@ def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest), def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest), (BRZ R32C:$rA, bb:$dest)>; def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest), - (BRZ R32C:$rA, bb:$dest)>; + (BRNZ R32C:$rA, bb:$dest)>; let isTerminator = 1, isBarrier = 1 in { let isReturn = 1 in { @@ -2885,23 +2891,6 @@ let isTerminator = 1, isBarrier = 1 in { } } -//===----------------------------------------------------------------------===// -// Various brcond predicates: -//===----------------------------------------------------------------------===// -/* -def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest), - (BRZ R32C:$rA, bb:$dest)>; - -def : Pat<(brcond (i32 (seteq R32C:$rA, R32C:$rB)), bb:$dest), - (BRNZ (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>; - -def : Pat<(brcond (i16 (seteq R16C:$rA, i16ImmSExt10:$val)), bb:$dest), - (BRHNZ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; - -def : Pat<(brcond (i16 (seteq R16C:$rA, R16C:$rB)), bb:$dest), - (BRHNZ (CEQHr16 R16C:$rA, R16C:$rB), bb:$dest)>; -*/ - //===----------------------------------------------------------------------===// // Single precision floating point instructions //===----------------------------------------------------------------------===// @@ -3475,21 +3464,20 @@ def : Pat<(i32 (anyext R16C:$rSrc)), // low parts in order to load them into a register. //===----------------------------------------------------------------------===// -def : Pat<(SPUhi tglobaladdr:$in, 0), (ILHUhi tglobaladdr:$in)>; -def : Pat<(SPUlo tglobaladdr:$in, 0), (ILAlo tglobaladdr:$in)>; def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; def : Pat<(SPUxform tglobaladdr:$in, 0), (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; -def : Pat<(SPUhi tjumptable:$in, 0), (ILHUhi tjumptable:$in)>; -def : Pat<(SPUlo tjumptable:$in, 0), (ILAlo tjumptable:$in)>; + def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>; def : Pat<(SPUxform tjumptable:$in, 0), (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; -def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>; -def : Pat<(SPUlo tconstpool:$in , 0), (ILAlo tconstpool:$in)>; -def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>; -/* def : Pat<(SPUxform tconstpool:$in, 0), - (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; */ + +def : Pat<(SPUhi tconstpool:$in , 0), (ILHUhi tconstpool:$in)>; +def : Pat<(SPUlo tconstpool:$in , 0), (ILAlsa tconstpool:$in)>; +def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>; +// tblgen bug prevents this from working. +// def : Pat<(SPUxform tconstpool:$in, 0), +// (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; // Instrinsics: include "CellSDKIntrinsics.td" diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index b176fc98c7f..84601301c33 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -195,6 +195,10 @@ def SPUdform : SDNode<"SPUISD::DFormAddr", SDTIntBinOp, []>; // X-Form "$reg($reg)" addresses def SPUxform : SDNode<"SPUISD::XFormAddr", SDTIntBinOp, []>; +// Load result node +def SPUload_result : SDTypeProfile<1, 3, []>; +def SPUldresult : SDNode<"SPUISD::LDRESULT", SPUload_result, [SDNPHasChain]>; + // SPU 32-bit sign-extension to 64-bits def SPUsext32_to_64: SDNode<"SPUISD::SEXT32TO64", SDTIntExtendOp, []>; diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll index 27157ca74ca..3c5810ef183 100644 --- a/test/CodeGen/CellSPU/call_indirect.ll +++ b/test/CodeGen/CellSPU/call_indirect.ll @@ -1,18 +1,18 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s ; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s -; RUN: grep bisl %t1.s | count 6 && +; RUN: grep bisl %t1.s | count 7 && ; RUN: grep ila %t1.s | count 1 && ; RUN: grep rotqbyi %t1.s | count 4 && -; RUN: grep lqa %t1.s | count 4 && +; RUN: grep lqa %t1.s | count 5 && ; RUN: grep lqd %t1.s | count 6 && ; RUN: grep dispatch_tab %t1.s | count 10 -; RUN: grep bisl %t2.s | count 6 && -; RUN: grep ilhu %t2.s | count 1 && -; RUN: grep iohl %t2.s | count 1 && -; RUN: grep rotqby %t2.s | count 5 && +; RUN: grep bisl %t2.s | count 7 && +; RUN: grep ilhu %t2.s | count 2 && +; RUN: grep iohl %t2.s | count 2 && +; RUN: grep rotqby %t2.s | count 6 && ; RUN: grep lqd %t2.s | count 12 && -; RUN: grep lqx %t2.s | count 6 && -; RUN: grep il %t2.s | count 7 && +; RUN: grep lqx %t2.s | count 8 && +; RUN: grep il %t2.s | count 9 && ; RUN: grep ai %t2.s | count 5 && ; RUN: grep dispatch_tab %t2.s | count 7 @@ -38,3 +38,13 @@ entry: tail call void %tmp2.5( i32 %i_arg, float %f_arg ) ret void } + +@ptr_list = internal global [1 x void ()*] [ void ()* inttoptr (i64 4294967295 to void ()*) ], align 4 +@ptr.a = internal global void ()** getelementptr ([1 x void ()*]* @ptr_list, i32 0, i32 1), align 16 + +define void @double_indirect_call() { + %a = load void ()*** @ptr.a, align 16 + %b = load void ()** %a, align 4 + tail call void %b() + ret void +} diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll new file mode 100644 index 00000000000..122e3035719 --- /dev/null +++ b/test/CodeGen/CellSPU/mul_ops.ll @@ -0,0 +1,90 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep mpy %t1.s | count 44 && +; RUN: grep mpyu %t1.s | count 4 && +; RUN: grep mpyh %t1.s | count 10 && +; RUN: grep mpyhh %t1.s | count 2 && +; RUN: grep rotma %t1.s | count 12 && +; RUN: grep rotmahi %t1.s | count 4 && +; RUN: grep and %t1.s | count 2 && +; RUN: grep selb %t1.s | count 6 && +; RUN: grep fsmbi %t1.s | count 4 && +; RUN: grep shli %t1.s | count 4 && +; RUN: grep shlhi %t1.s | count 4 && +; RUN: grep ila %t1.s | count 2 && +; RUN: grep xsbh %t1.s | count 8 && +; RUN: grep xshw %t1.s | count 4 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; 32-bit multiply instruction generation: +define <4 x i32> @mpy_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { +entry: + %A = mul <4 x i32> %arg1, %arg2 + ret <4 x i32> %A +} + +define <4 x i32> @mpy_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { +entry: + %A = mul <4 x i32> %arg2, %arg1 + ret <4 x i32> %A +} + +define <8 x i16> @mpy_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { +entry: + %A = mul <8 x i16> %arg1, %arg2 + ret <8 x i16> %A +} + +define <8 x i16> @mpy_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { +entry: + %A = mul <8 x i16> %arg2, %arg1 + ret <8 x i16> %A +} + +define <16 x i8> @mul_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { +entry: + %A = mul <16 x i8> %arg2, %arg1 + ret <16 x i8> %A +} + +define <16 x i8> @mul_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { +entry: + %A = mul <16 x i8> %arg1, %arg2 + ret <16 x i8> %A +} + +define i32 @mul_i32_1(i32 %arg1, i32 %arg2) { +entry: + %A = mul i32 %arg2, %arg1 + ret i32 %A +} + +define i32 @mul_i32_2(i32 %arg1, i32 %arg2) { +entry: + %A = mul i32 %arg1, %arg2 + ret i32 %A +} + +define i16 @mul_i16_1(i16 %arg1, i16 %arg2) { +entry: + %A = mul i16 %arg2, %arg1 + ret i16 %A +} + +define i16 @mul_i16_2(i16 %arg1, i16 %arg2) { +entry: + %A = mul i16 %arg1, %arg2 + ret i16 %A +} + +define i8 @mul_i8_1(i8 %arg1, i8 %arg2) { +entry: + %A = mul i8 %arg2, %arg1 + ret i8 %A +} + +define i8 @mul_i8_2(i8 %arg1, i8 %arg2) { +entry: + %A = mul i8 %arg1, %arg2 + ret i8 %A +} diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll index a28520cf4ca..b0286d1bdc9 100644 --- a/test/CodeGen/CellSPU/struct_1.ll +++ b/test/CodeGen/CellSPU/struct_1.ll @@ -16,9 +16,10 @@ ; RUN: grep rotqbyi %t2.s | count 5 && ; RUN: grep xshw %t2.s | count 1 && ; RUN: grep andi %t2.s | count 4 && -; RUN: grep cbd %t2.s | count 3 && -; RUN: grep chd %t2.s | count 1 && -; RUN: grep cwd %t2.s | count 3 && +; RUN: grep cbx %t2.s | count 3 && +; RUN: grep chx %t2.s | count 1 && +; RUN: grep cwx %t2.s | count 1 && +; RUN: grep cwd %t2.s | count 2 && ; RUN: grep shufb %t2.s | count 7 && ; RUN: grep stqx %t2.s | count 7