Lower thumbv4t & thumbv5 lo->lo copies through a push-pop sequence

[oota-llvm.git] / lib / Target / ARM / ARMISelLowering.cpp
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 81e15719154e3ad8ecbb877a6990453ca7864cd6..24992c7d6f2fa23ff35e4cc4727fd7ed928fa8e0 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -267,10 +267,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
        { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
        { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
        { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+      // FIXME: double __aeabi_drsub(double x, double y) (rsub)
        { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
  
        // Double-precision floating-point comparison helper functions
        // RTABI chapter 4.1.2, Table 3
+      // FIXME: void __aeabi_cdcmpeq(double, double)
+      // FIXME: void __aeabi_cdcmple(double, double)
+      // FIXME: void __aeabi_cdrcmple(double, double)
        { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
        { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
        { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
@@ -285,10 +289,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
        { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
        { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
        { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+      // FIXME: void __aeabi_frsub(float x, float y)
        { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
  
        // Single-precision floating-point comparison helper functions
        // RTABI chapter 4.1.2, Table 5
+      // FIXME: void __aeabi_cfcmpeq(float, float)
+      // FIXME: void __aeabi_cfcmple(float, float)
+      // FIXME: void __aeabi_cfrcmple(float, float)
        { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
        { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
        { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
@@ -313,7 +321,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
        // RTABI chapter 4.1.2, Table 7
        { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
        { RTLIB::FPEXT_F32_F64,   "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+      // FIXME: float __aeabi_f2f(short)
+      // FIXME: float __aeabi_h2f_alt(short)
+      // FIXME: short __aeabi_f2h(float)
+      // FIXME: short __aeabi_f2h_alt(float)
        { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+      // FIXME: short __aeabi_d2h_alt(double)
  
        // Integer to floating-point conversions.
        // RTABI chapter 4.1.2, Table 8
@@ -338,6 +351,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
        { RTLIB::SHL_I64,     "__aeabi_llsl",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
        { RTLIB::SRL_I64,     "__aeabi_llsr",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
        { RTLIB::SRA_I64,     "__aeabi_lasr",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+      // FIXME: int __aeabi_lcmp(long long, long long)
+      // FIXME: int __aeabi_ulcmp(unsigned long long, unsigned long long)
  
        // Integer division functions
        // RTABI chapter 4.3.1
@@ -356,9 +371,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
  
        // Memory operations
        // RTABI chapter 4.3.4
+      // FIXME: void __aeabi_memcpy8(void *, const void *, size_t)
+      // FIXME: void __aeabi_memcpy4(void *, const void *, size_t)
        { RTLIB::MEMCPY,  "__aeabi_memcpy",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+      // FIXME: void __aeabi_memmove8(void *, const void *, size_t)
+      // FIXME: void __aeabi_memmove4(void *, const void *, size_t)
        { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+      // FIXME: void __aeabi_memset8(void *, size_t, int)
+      // FIXME: void __aeabi_memset4(void *, size_t, int)
        { RTLIB::MEMSET,  "__aeabi_memset",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+      // FIXME: void __aeabi_memclr8(void *, size_t)
+      // FIXME: void __aeabi_memclr4(void *, size_t)
+      // FIXME: void __aeabi_memclr(void *, size_t)
      };
  
      for (const auto &LC : LibraryCalls) {
@@ -1973,6 +1997,19 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
    if (Subtarget->isThumb1Only())
      return false;
  
+  // Externally-defined functions with weak linkage should not be
+  // tail-called on ARM when the OS does not support dynamic
+  // pre-emption of symbols, as the AAELF spec requires normal calls
+  // to undefined weak functions to be replaced with a NOP or jump to the
+  // next instruction. The behaviour of branch instructions in this
+  // situation (as used for tail calls) is implementation-defined, so we
+  // cannot rely on the linker replacing the tail call with a return.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = G->getGlobal();
+    if (GV->hasExternalWeakLinkage())
+      return false;
+  }
+
    // If the calling conventions do not match, then we'd better make sure the
    // results are returned in the same way as what the caller expects.
    if (!CCMatch) {
@@ -2605,9 +2642,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
    switch (IntNo) {
    default: return SDValue();    // Don't custom lower most intrinsics.
    case Intrinsic::arm_rbit: {
-    assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+    assert(Op.getOperand(1).getValueType() == MVT::i32 &&
             "RBIT intrinsic must have i32 type!");
-    return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0));
+    return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));
    }
    case Intrinsic::arm_thread_pointer: {
      EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -10848,8 +10885,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                                           AtomicOrdering Ord) const {
    Module *M = Builder.GetInsertBlock()->getParent()->getParent();
    Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
-  bool IsAcquire =
-      Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+  bool IsAcquire = isAtLeastAcquire(Ord);
  
    // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
    // intrinsic must return {i32, i32} and we have to recombine them into a
@@ -10885,8 +10921,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
                                                 Value *Addr,
                                                 AtomicOrdering Ord) const {
    Module *M = Builder.GetInsertBlock()->getParent()->getParent();
-  bool IsRelease =
-      Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+  bool IsRelease = isAtLeastRelease(Ord);
  
    // Since the intrinsics must have legal type, the i64 intrinsics take two
    // parameters: "i32, i32". We must marshal Val into the appropriate form