X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMISelLowering.cpp;h=b82a2209e550bc7f7d15e32ed50efa4d53a44176;hb=235e2f6a68b5f37d6c1b554330eebc8d32f1aca9;hp=c55b7b4a7dc14b8418752780e8eee8ba8ddd9f0a;hpb=8e6c2b90410ae8c8f5a9a89e35de1dcc62840dbf;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c55b7b4a7dc..b82a2209e55 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -17,6 +17,7 @@ #include "ARMConstantPoolValue.h" #include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" +#include "ARMPerfectShuffle.h" #include "ARMRegisterInfo.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" @@ -39,6 +40,7 @@ #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include using namespace llvm; static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, @@ -75,9 +77,14 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); if (ElemTy == MVT::i8 || ElemTy == MVT::i16) setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); + if (ElemTy != MVT::i32) { + setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); + setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); + } setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT.getSimpleVT(), Custom); setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); if (VT.isInteger()) { @@ -98,6 +105,14 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, AddPromotedToType (ISD::XOR, VT.getSimpleVT(), PromotedBitwiseVT.getSimpleVT()); } + + // Neon does not support vector divide/remainder operations. + setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); + setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); + setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); } void ARMTargetLowering::addDRTypeForNEON(EVT VT) { @@ -236,6 +251,39 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addQRTypeForNEON(MVT::v4i32); addQRTypeForNEON(MVT::v2i64); + // v2f64 is legal so that QR subregs can be extracted as f64 elements, but + // neither Neon nor VFP support any arithmetic operations on it. + setOperationAction(ISD::FADD, MVT::v2f64, Expand); + setOperationAction(ISD::FSUB, MVT::v2f64, Expand); + setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + setOperationAction(ISD::FDIV, MVT::v2f64, Expand); + setOperationAction(ISD::FREM, MVT::v2f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); + setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); + setOperationAction(ISD::FNEG, MVT::v2f64, Expand); + setOperationAction(ISD::FABS, MVT::v2f64, Expand); + setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); + setOperationAction(ISD::FSIN, MVT::v2f64, Expand); + setOperationAction(ISD::FCOS, MVT::v2f64, Expand); + setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); + setOperationAction(ISD::FPOW, MVT::v2f64, Expand); + setOperationAction(ISD::FLOG, MVT::v2f64, Expand); + setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); + setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); + setOperationAction(ISD::FEXP, MVT::v2f64, Expand); + setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); + setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); + setOperationAction(ISD::FRINT, MVT::v2f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); + + // Neon does not support some operations on v1i64 and v2i64 types. + setOperationAction(ISD::MUL, MVT::v1i64, Expand); + setOperationAction(ISD::MUL, MVT::v2i64, Expand); + setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); + setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRL); @@ -344,8 +392,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); @@ -482,22 +528,19 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; - case ARMISD::VLD2D: return "ARMISD::VLD2D"; - case ARMISD::VLD3D: return "ARMISD::VLD3D"; - case ARMISD::VLD4D: return "ARMISD::VLD4D"; - case ARMISD::VST2D: return "ARMISD::VST2D"; - case ARMISD::VST3D: return "ARMISD::VST3D"; - case ARMISD::VST4D: return "ARMISD::VST4D"; case ARMISD::VEXT: return "ARMISD::VEXT"; case ARMISD::VREV64: return "ARMISD::VREV64"; case ARMISD::VREV32: return "ARMISD::VREV32"; case ARMISD::VREV16: return "ARMISD::VREV16"; + case ARMISD::VZIP: return "ARMISD::VZIP"; + case ARMISD::VUZP: return "ARMISD::VUZP"; + case ARMISD::VTRN: return "ARMISD::VTRN"; } } /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { - return getTargetMachine().getSubtarget().isThumb() ? 1 : 2; + return getTargetMachine().getSubtarget().isThumb() ? 0 : 1; } //===----------------------------------------------------------------------===// @@ -521,12 +564,9 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { } } -/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It -/// returns true if the operands should be inverted to form the proper -/// comparison. -static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, +/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. +static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2) { - bool Invert = false; CondCode2 = ARMCC::AL; switch (CC) { default: llvm_unreachable("Unknown FP condition!"); @@ -537,7 +577,7 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, case ISD::SETGE: case ISD::SETOGE: CondCode = ARMCC::GE; break; case ISD::SETOLT: CondCode = ARMCC::MI; break; - case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break; + case ISD::SETOLE: CondCode = ARMCC::LS; break; case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; case ISD::SETO: CondCode = ARMCC::VC; break; case ISD::SETUO: CondCode = ARMCC::VS; break; @@ -551,7 +591,6 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, case ISD::SETNE: case ISD::SETUNE: CondCode = ARMCC::NE; break; } - return Invert; } //===----------------------------------------------------------------------===// @@ -687,7 +726,7 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, /// CCAssignFnForNode - Selects the correct CCAssignFn for a the /// given CallingConvention value. -CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC, +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const { switch (CC) { @@ -717,7 +756,7 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC, /// appropriate copies out of appropriate physical registers. SDValue ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - unsigned CallConv, bool isVarArg, + CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) { @@ -848,7 +887,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, /// nodes. SDValue ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - unsigned CallConv, bool isVarArg, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl &Outs, const SmallVectorImpl &Ins, @@ -971,8 +1010,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, isLocalARMFunc = !Subtarget->isThumb() && !isExt; // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, - ARMCP::CPStub, 4); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, + ARMPCLabelIndex, + ARMCP::CPValue, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, @@ -991,8 +1031,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), - Sym, ARMPCLabelIndex, - ARMCP::CPStub, 4); + Sym, ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, @@ -1052,7 +1091,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue ARMTargetLowering::LowerReturn(SDValue Chain, - unsigned CallConv, bool isVarArg, + CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, DebugLoc dl, SelectionDAG &DAG) { @@ -1169,8 +1208,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, EVT PtrVT = getPointerTy(); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, - PCAdj, "tlsgd", true); + new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, "tlsgd", true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0); @@ -1211,8 +1250,8 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, // initial exec model unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, - PCAdj, "gottpoff", true); + new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, "gottpoff", true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); @@ -1224,8 +1263,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); } else { // local exec model - ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff"); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); @@ -1259,52 +1297,40 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, if (RelocM == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT"); + new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT"); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), - CPAddr, NULL, 0); + CPAddr, + PseudoSourceValue::getConstantPool(), 0); SDValue Chain = Result.getValue(1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) - Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); + Result = DAG.getLoad(PtrVT, dl, Chain, Result, + PseudoSourceValue::getGOT(), 0); return Result; } else { SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); } } -/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol -/// even in non-static mode. -static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) { - // If symbol visibility is hidden, the extra load is not needed if - // the symbol is definitely defined in the current translation unit. - bool isDecl = GV->isDeclaration() || GV->hasAvailableExternallyLinkage(); - if (GV->hasHiddenVisibility() && (!isDecl && !GV->hasCommonLinkage())) - return false; - return RelocM != Reloc::Static && (isDecl || GV->isWeakForLinker()); -} - SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) { EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); GlobalValue *GV = cast(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - bool IsIndirect = GVIsIndirectSymbol(GV, RelocM); SDValue CPAddr; if (RelocM == Reloc::Static) CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); else { - unsigned PCAdj = (RelocM != Reloc::PIC_) - ? 0 : (Subtarget->isThumb() ? 4 : 8); - ARMCP::ARMCPKind Kind = IsIndirect ? ARMCP::CPNonLazyPtr - : ARMCP::CPValue; - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, - Kind, PCAdj); + unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -1316,7 +1342,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } - if (IsIndirect) + + if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); return Result; @@ -1331,66 +1358,15 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", - ARMPCLabelIndex, - ARMCP::CPValue, PCAdj); + ARMPCLabelIndex, PCAdj); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } -static SDValue LowerNeonVLDIntrinsic(SDValue Op, SelectionDAG &DAG, - unsigned Opcode) { - SDNode *Node = Op.getNode(); - EVT VT = Node->getValueType(0); - DebugLoc dl = Op.getDebugLoc(); - - if (!VT.is64BitVector()) - return SDValue(); // unimplemented - - SDValue Ops[] = { Node->getOperand(0), - Node->getOperand(2) }; - return DAG.getNode(Opcode, dl, Node->getVTList(), Ops, 2); -} - -static SDValue LowerNeonVSTIntrinsic(SDValue Op, SelectionDAG &DAG, - unsigned Opcode, unsigned NumVecs) { - SDNode *Node = Op.getNode(); - EVT VT = Node->getOperand(3).getValueType(); - DebugLoc dl = Op.getDebugLoc(); - - if (!VT.is64BitVector()) - return SDValue(); // unimplemented - - SmallVector Ops; - Ops.push_back(Node->getOperand(0)); - Ops.push_back(Node->getOperand(2)); - for (unsigned N = 0; N < NumVecs; ++N) - Ops.push_back(Node->getOperand(N + 3)); - return DAG.getNode(Opcode, dl, MVT::Other, Ops.data(), Ops.size()); -} - -SDValue -ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { - unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); - switch (IntNo) { - case Intrinsic::arm_neon_vld2: - return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD2D); - case Intrinsic::arm_neon_vld3: - return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD3D); - case Intrinsic::arm_neon_vld4: - return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D); - case Intrinsic::arm_neon_vst2: - return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST2D, 2); - case Intrinsic::arm_neon_vst3: - return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST3D, 3); - case Intrinsic::arm_neon_vst4: - return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST4D, 4); - default: return SDValue(); // Don't custom lower most intrinsics. - } -} - SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); @@ -1402,10 +1378,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); } case Intrinsic::eh_sjlj_lsda: { - // blah. horrible, horrible hack with the forced magic name. - // really need to clean this up. It belongs in the target-independent - // layer somehow that doesn't require the coupling with the asm - // printer. MachineFunction &MF = DAG.getMachineFunction(); EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); @@ -1413,14 +1385,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { SDValue CPAddr; unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); - ARMCP::ARMCPKind Kind = ARMCP::CPValue; - // Save off the LSDA name for the AsmPrinter to use when it's time - // to emit the table - std::string LSDAName = "L_lsda_"; - LSDAName += MF.getFunction()->getName(); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(*DAG.getContext(), LSDAName.c_str(), - ARMPCLabelIndex, Kind, PCAdj); + new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, + ARMCP::CPLSDA, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = @@ -1532,7 +1499,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue ARMTargetLowering::LowerFormalArguments(SDValue Chain, - unsigned CallConv, bool isVarArg, + CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, @@ -1796,8 +1763,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, } ARMCC::CondCodes CondCode, CondCode2; - if (FPCCToARMCC(CC, CondCode, CondCode2)) - std::swap(TrueVal, FalseVal); + FPCCToARMCC(CC, CondCode, CondCode2); SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); @@ -1833,9 +1799,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); ARMCC::CondCodes CondCode, CondCode2; - if (FPCCToARMCC(CC, CondCode, CondCode2)) - // Swap the LHS/RHS of the comparison if needed. - std::swap(LHS, RHS); + FPCCToARMCC(CC, CondCode, CondCode2); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); @@ -2068,14 +2032,19 @@ static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { // will be implemented with the NEON VNEG instruction. However, VNEG does // not support i64 elements, so sometimes the zero vectors will need to be // explicitly constructed. For those cases, and potentially other uses in - // the future, always build zero vectors as <4 x i32> or <2 x i32> bitcasted + // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - if (VT.getSizeInBits() == 64) - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - else - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + SDValue Cst = DAG.getTargetConstant(0, MVT::i8); + SmallVector Ops; + MVT TVT; + + if (VT.getSizeInBits() == 64) { + Ops.assign(8, Cst); TVT = MVT::v8i8; + } else { + Ops.assign(16, Cst); TVT = MVT::v16i8; + } + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } @@ -2085,14 +2054,19 @@ static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest - // type. This ensures they get CSE'd. + // Always build ones vectors as <16 x i32> or <8 x i32> bitcasted to their + // dest type. This ensures they get CSE'd. SDValue Vec; - SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); - if (VT.getSizeInBits() == 64) - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - else - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8); + SmallVector Ops; + MVT TVT; + + if (VT.getSizeInBits() == 64) { + Ops.assign(8, Cst); TVT = MVT::v8i8; + } else { + Ops.assign(16, Cst); TVT = MVT::v16i8; + } + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } @@ -2130,8 +2104,11 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, N->getOperand(0), NegatedCount); } - assert(VT == MVT::i64 && - (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && + // We can get here for a node like i32 = ISD::SHL i32, i64 + if (VT != MVT::i64) + return SDValue(); + + assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && "Unknown shift to lower!"); // We only lower SRA, SRL of 1 here, all others use generic lowering. @@ -2399,6 +2376,53 @@ static bool isVREVMask(const SmallVectorImpl &M, EVT VT, return true; } +static bool isVTRNMask(const SmallVectorImpl &M, EVT VT, + unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i < NumElts; i += 2) { + if ((unsigned) M[i] != i + WhichResult || + (unsigned) M[i+1] != i + NumElts + WhichResult) + return false; + } + return true; +} + +static bool isVUZPMask(const SmallVectorImpl &M, EVT VT, + unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i != NumElts; ++i) { + if ((unsigned) M[i] != 2 * i + WhichResult) + return false; + } + + // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. + if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32) + return false; + + return true; +} + +static bool isVZIPMask(const SmallVectorImpl &M, EVT VT, + unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned i = 0; i != NumElts; i += 2) { + if ((unsigned) M[i] != Idx || + (unsigned) M[i+1] != Idx + NumElts) + return false; + Idx += 1; + } + + // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. + if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32) + return false; + + return true; +} + static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) { // Canonicalize all-zeros and all-ones vectors. ConstantSDNode *ConstVal = cast(Val.getNode()); @@ -2446,10 +2470,12 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { unsigned SplatBitSize; bool HasAnyUndefs; if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { - SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, DAG); - if (Val.getNode()) - return BuildSplat(Val, VT, DAG, dl); + if (SplatBitSize <= 64) { + SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, DAG); + if (Val.getNode()) + return BuildSplat(Val, VT, DAG, dl); + } } // If there are only 2 elements in a 128-bit vector, insert them into an @@ -2479,20 +2505,114 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { bool ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, EVT VT) const { + if (VT.getVectorNumElements() == 4 && + (VT.is128BitVector() || VT.is64BitVector())) { + unsigned PFIndexes[4]; + for (unsigned i = 0; i != 4; ++i) { + if (M[i] < 0) + PFIndexes[i] = 8; + else + PFIndexes[i] = M[i]; + } + + // Compute the index in the perfect shuffle table. + unsigned PFTableIndex = + PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; + unsigned Cost = (PFEntry >> 30); + + if (Cost <= 4) + return true; + } + bool ReverseVEXT; - unsigned Imm; + unsigned Imm, WhichResult; return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isVREVMask(M, VT, 64) || isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16) || - isVEXTMask(M, VT, ReverseVEXT, Imm)); + isVEXTMask(M, VT, ReverseVEXT, Imm) || + isVTRNMask(M, VT, WhichResult) || + isVUZPMask(M, VT, WhichResult) || + isVZIPMask(M, VT, WhichResult)); +} + +/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit +/// the specified operations to build the shuffle. +static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, + SDValue RHS, SelectionDAG &DAG, + DebugLoc dl) { + unsigned OpNum = (PFEntry >> 26) & 0x0F; + unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); + unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); + + enum { + OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> + OP_VREV, + OP_VDUP0, + OP_VDUP1, + OP_VDUP2, + OP_VDUP3, + OP_VEXT1, + OP_VEXT2, + OP_VEXT3, + OP_VUZPL, // VUZP, left result + OP_VUZPR, // VUZP, right result + OP_VZIPL, // VZIP, left result + OP_VZIPR, // VZIP, right result + OP_VTRNL, // VTRN, left result + OP_VTRNR // VTRN, right result + }; + + if (OpNum == OP_COPY) { + if (LHSID == (1*9+2)*9+3) return LHS; + assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); + return RHS; + } + + SDValue OpLHS, OpRHS; + OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); + OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); + EVT VT = OpLHS.getValueType(); + + switch (OpNum) { + default: llvm_unreachable("Unknown shuffle opcode!"); + case OP_VREV: + return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); + case OP_VDUP0: + case OP_VDUP1: + case OP_VDUP2: + case OP_VDUP3: + return DAG.getNode(ARMISD::VDUPLANE, dl, VT, + OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); + case OP_VEXT1: + case OP_VEXT2: + case OP_VEXT3: + return DAG.getNode(ARMISD::VEXT, dl, VT, + OpLHS, OpRHS, + DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); + case OP_VUZPL: + case OP_VUZPR: + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); + case OP_VZIPL: + case OP_VZIPR: + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); + case OP_VTRNL: + case OP_VTRNR: + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); + } } static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { - ShuffleVectorSDNode *SVN = cast(Op.getNode()); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); + ShuffleVectorSDNode *SVN = cast(Op.getNode()); SmallVector ShuffleMask; // Convert shuffles that are directly supported on NEON to target-specific @@ -2505,49 +2625,80 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { int Lane = SVN->getSplatIndex(); - SDValue Op0 = SVN->getOperand(0); - if (Lane == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) { - return DAG.getNode(ARMISD::VDUP, dl, VT, Op0.getOperand(0)); + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { + return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } - return DAG.getNode(ARMISD::VDUPLANE, dl, VT, SVN->getOperand(0), + return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, MVT::i32)); } bool ReverseVEXT; unsigned Imm; if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { - SDValue Op0 = SVN->getOperand(0); - SDValue Op1 = SVN->getOperand(1); if (ReverseVEXT) - std::swap(Op0, Op1); - return DAG.getNode(ARMISD::VEXT, dl, VT, Op0, Op1, + std::swap(V1, V2); + return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, DAG.getConstant(Imm, MVT::i32)); } if (isVREVMask(ShuffleMask, VT, 64)) - return DAG.getNode(ARMISD::VREV64, dl, VT, SVN->getOperand(0)); + return DAG.getNode(ARMISD::VREV64, dl, VT, V1); if (isVREVMask(ShuffleMask, VT, 32)) - return DAG.getNode(ARMISD::VREV32, dl, VT, SVN->getOperand(0)); + return DAG.getNode(ARMISD::VREV32, dl, VT, V1); if (isVREVMask(ShuffleMask, VT, 16)) - return DAG.getNode(ARMISD::VREV16, dl, VT, SVN->getOperand(0)); + return DAG.getNode(ARMISD::VREV16, dl, VT, V1); + + // Check for Neon shuffles that modify both input vectors in place. + // If both results are used, i.e., if there are two shuffles with the same + // source operands and with masks corresponding to both results of one of + // these operations, DAG memoization will ensure that a single node is + // used for both shuffles. + unsigned WhichResult; + if (isVTRNMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + if (isVUZPMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + if (isVZIPMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + + // If the shuffle is not directly supported and it has 4 elements, use + // the PerfectShuffle-generated table to synthesize it from other shuffles. + if (VT.getVectorNumElements() == 4 && + (VT.is128BitVector() || VT.is64BitVector())) { + unsigned PFIndexes[4]; + for (unsigned i = 0; i != 4; ++i) { + if (ShuffleMask[i] < 0) + PFIndexes[i] = 8; + else + PFIndexes[i] = ShuffleMask[i]; + } - return SDValue(); -} + // Compute the index in the perfect shuffle table. + unsigned PFTableIndex = + PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; + + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; + unsigned Cost = (PFEntry >> 30); + + if (Cost <= 4) + return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); + } -static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { - return Op; + return SDValue(); } static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - assert((VT == MVT::i8 || VT == MVT::i16) && - "unexpected type for custom-lowering vector extract"); SDValue Vec = Op.getOperand(0); SDValue Lane = Op.getOperand(1); - Op = DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); - Op = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Op, DAG.getValueType(VT)); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); + assert(VT == MVT::i32 && + Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && + "unexpected type for custom-lowering vector extract"); + return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); } static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { @@ -2591,8 +2742,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::RETURNADDR: break; case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); - case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); case ISD::SHL: @@ -2601,7 +2750,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::VSETCC: return LowerVSETCC(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); - case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); } @@ -2636,7 +2784,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { + MachineBasicBlock *BB, + DenseMap *EM) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); switch (MI->getOpcode()) { @@ -2667,12 +2816,15 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - sinkMBB->addSuccessor(*i); + // Also inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), + E = BB->succ_end(); I != E; ++I) { + EM->insert(std::make_pair(*I, sinkMBB)); + sinkMBB->addSuccessor(*I); + } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. - while(!BB->succ_empty()) + while (!BB->succ_empty()) BB->removeSuccessor(BB->succ_begin()); BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -3817,3 +3969,9 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, Ops, DAG); } + +bool +ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The ARM target isn't yet aware of offsets. + return false; +}