Add X86FastISel support for return statements. This entails refactoring

[oota-llvm.git] / lib / Target / ARM / ARMISelLowering.cpp
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 333907c4fb8516110aba5ddf527c4de85c0b28ec..8f82c7498a22dbb999aa9af126de9288459dce27 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -67,6 +67,11 @@ ARMInterworking("arm-interworking", cl::Hidden,
    cl::desc("Enable / disable ARM interworking (for debugging only)"),
    cl::init(true));
  
+static cl::opt<bool>
+EnableARMCodePlacement("arm-code-placement", cl::Hidden,
+  cl::desc("Enable code placement pass for ARM."),
+  cl::init(false));
+
  static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
                                     CCValAssign::LocInfo &LocInfo,
                                     ISD::ArgFlagsTy &ArgFlags,
@@ -441,6 +446,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8,  Expand);
      setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
      setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+    // Since the libcalls include locking, fold in the fences
+    setShouldFoldAtomicFences(true);
    }
    // 64-bit versions are always libcalls (for now)
    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Expand);
@@ -452,11 +459,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Expand);
    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
  
-  // If the subtarget does not have extract instructions, sign_extend_inreg
-  // needs to be expanded. Extract is available in ARM mode on v6 and up,
-  // and on most Thumb2 implementations.
-  if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops())
-      || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) {
+  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
+  if (!Subtarget->hasV6Ops()) {
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
    }
@@ -469,6 +473,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
  
    // We want to custom lower some of our intrinsics.
    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+  if (Subtarget->isTargetDarwin()) {
+    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+  }
  
    setOperationAction(ISD::SETCC,     MVT::i32, Expand);
    setOperationAction(ISD::SETCC,     MVT::f32, Expand);
@@ -529,28 +537,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
    else
      setSchedulingPreference(Sched::Hybrid);
  
-  // FIXME: If-converter should use instruction latency to determine
-  // profitability rather than relying on fixed limits.
-  if (Subtarget->getCPUString() == "generic") {
-    // Generic (and overly aggressive) if-conversion limits.
-    setIfCvtBlockSizeLimit(10);
-    setIfCvtDupBlockSizeLimit(2);
-  } else if (Subtarget->hasV7Ops()) {
-    setIfCvtBlockSizeLimit(3);
-    setIfCvtDupBlockSizeLimit(1);
-  } else if (Subtarget->hasV6Ops()) {
-    setIfCvtBlockSizeLimit(2);
-    setIfCvtDupBlockSizeLimit(1);
-  } else {
-    setIfCvtBlockSizeLimit(3);
-    setIfCvtDupBlockSizeLimit(2);
-  }
-
    maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
-  // Do not enable CodePlacementOpt for now: it currently runs after the
-  // ARMConstantIslandPass and messes up branch relaxation and placement
-  // of constant islands.
-  // benefitFromCodePlacementOpt = true;
+
+  if (EnableARMCodePlacement)
+    benefitFromCodePlacementOpt = true;
  }
  
  const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -661,7 +651,7 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
  
  /// getFunctionAlignment - Return the Log2 alignment of this function.
  unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
-  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
+  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
  }
  
  Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
@@ -1040,6 +1030,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                               CallingConv::ID CallConv, bool isVarArg,
                               bool &isTailCall,
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<SDValue> &OutVals,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               DebugLoc dl, SelectionDAG &DAG,
                               SmallVectorImpl<SDValue> &InVals) const {
@@ -1053,7 +1044,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
      // Check if it's really possible to do a tail call.
      isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
                      isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
-                                                   Outs, Ins, DAG);
+                                                   Outs, OutVals, Ins, DAG);
      // We don't support GuaranteedTailCallOpt for ARM, only automatically
      // detected sibcalls.
      if (isTailCall) {
@@ -1093,7 +1084,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
         i != e;
         ++i, ++realArgIdx) {
      CCValAssign &VA = ArgLocs[i];
-    SDValue Arg = Outs[realArgIdx].Val;
+    SDValue Arg = OutVals[realArgIdx];
      ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
  
      // Promote the value if needed.
@@ -1252,7 +1243,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
        Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                             getPointerTy(), Callee, PICLabel);
      } else
-      Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
+      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
      isDirect = true;
      bool isStub = Subtarget->isTargetDarwin() &&
@@ -1387,6 +1378,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
                                                       bool isCalleeStructRet,
                                                       bool isCallerStructRet,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                                       SelectionDAG& DAG) const {
    const Function *CallerF = DAG.getMachineFunction().getFunction();
@@ -1406,29 +1398,26 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
    if (isCalleeStructRet || isCallerStructRet)
      return false;
  
-  // FIXME: Completely disable sibcal for Thumb1 since Thumb1RegisterInfo::
+  // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
    // emitEpilogue is not ready for them.
    if (Subtarget->isThumb1Only())
      return false;
  
+  // For the moment, we can only do this to functions defined in this
+  // compilation, or to indirect calls.  A Thumb B to an ARM function,
+  // or vice versa, is not easily fixed up in the linker unlike BL.
+  // (We could do this by loading the address of the callee into a register;
+  // that is an extra instruction over the direct call and burns a register
+  // as well, so is not likely to be a win.)
    if (isa<ExternalSymbolSDNode>(Callee))
        return false;
  
    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    if (Subtarget->isThumb1Only())
+    const GlobalValue *GV = G->getGlobal();
+    if (GV->isDeclaration() || GV->isWeakForLinker())
        return false;
-
-    // On Thumb, for the moment, we can only do this to functions defined in this
-    // compilation, or to indirect calls.  A Thumb B to an ARM function is not
-    // easily fixed up in the linker, unlike BL.
-    if (Subtarget->isThumb()) {
-      const GlobalValue *GV = G->getGlobal();
-      if (GV->isDeclaration() || GV->isWeakForLinker())
-        return false;
-    }
    }
  
-
    // If the calling conventions do not match, then we'd better make sure the
    // results are returned in the same way as what the caller expects.
    if (!CCMatch) {
@@ -1483,7 +1472,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
             ++i, ++realArgIdx) {
          CCValAssign &VA = ArgLocs[i];
          EVT RegVT = VA.getLocVT();
-        SDValue Arg = Outs[realArgIdx].Val;
+        SDValue Arg = OutVals[realArgIdx];
          ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
          if (VA.getLocInfo() == CCValAssign::Indirect)
            return false;
@@ -1518,6 +1507,7 @@ SDValue
  ARMTargetLowering::LowerReturn(SDValue Chain,
                                 CallingConv::ID CallConv, bool isVarArg,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
                                 DebugLoc dl, SelectionDAG &DAG) const {
  
    // CCValAssign - represent the assignment of the return value to a location.
@@ -1548,7 +1538,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
      CCValAssign &VA = RVLocs[i];
      assert(VA.isRegLoc() && "Can only return in registers!");
  
-    SDValue Arg = Outs[realRVLocIdx].Val;
+    SDValue Arg = OutVals[realRVLocIdx];
  
      switch (VA.getLocInfo()) {
      default: llvm_unreachable("Unknown loc info!");
@@ -1786,7 +1776,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
      // pair. This is always cheaper.
      if (Subtarget->useMovt()) {
        return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
-                         DAG.getTargetGlobalAddress(GV, PtrVT));
+                         DAG.getTargetGlobalAddress(GV, dl, PtrVT));
      } else {
        SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
        CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -1903,7 +1893,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
        DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
                    PseudoSourceValue::getConstantPool(), 0,
                    false, false, 0);
-    SDValue Chain = Result.getValue(1);
  
      if (RelocM == Reloc::PIC_) {
        SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2014,7 +2003,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
    SDValue ArgValue2;
    if (NextVA.isMemLoc()) {
      MachineFrameInfo *MFI = MF.getFrameInfo();
-    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false);
+    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
  
      // Create load node to retrieve arguments from the stack.
      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -2070,8 +2059,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
            VA = ArgLocs[++i]; // skip ahead to next loc
            SDValue ArgValue2;
            if (VA.isMemLoc()) {
-            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(),
-                                            true, false);
+            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
              SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
              ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
                                      PseudoSourceValue::getFixedStack(FI), 0,
@@ -2138,8 +2126,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
        assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
  
        unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
-      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
-                                      true, false);
+      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true);
  
        // Create load nodes to retrieve arguments from the stack.
        SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -2170,7 +2157,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
        AFI->setVarArgsFrameIndex(
          MFI->CreateFixedObject(VARegSaveSize,
                                 ArgOffset + VARegSaveSize - VARegSize,
-                               true, false));
+                               true));
        SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
                                        getPointerTy());
  
@@ -2197,8 +2184,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
                              &MemOps[0], MemOps.size());
      } else
        // This will point to the next argument passed via stack.
-      AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset,
-                                                       true, false));
+      AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
    }
  
    return Chain;
@@ -3658,7 +3644,12 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
    MF->insert(It, loop1MBB);
    MF->insert(It, loop2MBB);
    MF->insert(It, exitMBB);
-  exitMBB->transferSuccessors(BB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
  
    //  thisMBB:
    //   ...
@@ -3696,7 +3687,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
    //   ...
    BB = exitMBB;
  
-  MF->DeleteMachineInstr(MI);   // The instruction is gone now.
+  MI->eraseFromParent();   // The instruction is gone now.
  
    return BB;
  }
@@ -3739,7 +3730,12 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
    MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
    MF->insert(It, loopMBB);
    MF->insert(It, exitMBB);
-  exitMBB->transferSuccessors(BB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
  
    MachineRegisterInfo &RegInfo = MF->getRegInfo();
    unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
@@ -3784,7 +3780,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
    //   ...
    BB = exitMBB;
  
-  MF->DeleteMachineInstr(MI);   // The instruction is gone now.
+  MI->eraseFromParent();   // The instruction is gone now.
  
    return BB;
  }
@@ -3869,22 +3865,21 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      MachineFunction *F = BB->getParent();
      MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
      MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
-    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
-      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
      F->insert(It, copy0MBB);
      F->insert(It, sinkMBB);
-    // Update machine-CFG edges by first adding all successors of the current
-    // block to the new block which will contain the Phi node for the select.
-    for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 
-           E = BB->succ_end(); I != E; ++I)
-      sinkMBB->addSuccessor(*I);
-    // Next, remove all successors of the current block, and add the true
-    // and fallthrough blocks as its successors.
-    while (!BB->succ_empty())
-      BB->removeSuccessor(BB->succ_begin());
+
+    // Transfer the remainder of BB and its successor edges to sinkMBB.
+    sinkMBB->splice(sinkMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
      BB->addSuccessor(copy0MBB);
      BB->addSuccessor(sinkMBB);
  
+    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+
      //  copy0MBB:
      //   %FalseValue = ...
      //   # fallthrough to sinkMBB
@@ -3897,11 +3892,12 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
      //  ...
      BB = sinkMBB;
-    BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg())
+    BuildMI(*BB, BB->begin(), dl,
+            TII->get(ARM::PHI), MI->getOperand(0).getReg())
        .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
        .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
  
-    F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
+    MI->eraseFromParent();   // The pseudo instruction is gone now.
      return BB;
    }
  
@@ -3922,7 +3918,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
        const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
        unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
          ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
-      BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP)
+      BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP)
          .addReg(SrcReg, getKillRegState(SrcIsKill));
      }
  
@@ -3954,7 +3950,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
        NeedPred = true; NeedCC = true; NeedOp3 = true;
        break;
      }
-    MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP);
+    MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP);
      if (OpOpc == ARM::tAND)
        AddDefaultT1CC(MIB);
      MIB.addReg(ARM::SP);
@@ -3970,10 +3966,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
      unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
        ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
-    BuildMI(BB, dl, TII->get(CopyOpc))
+    BuildMI(*BB, MI, dl, TII->get(CopyOpc))
        .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
        .addReg(ARM::SP);
-    MF->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
+    MI->eraseFromParent();   // The pseudo instruction is gone now.
      return BB;
    }
    }
@@ -4538,14 +4534,13 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
    if (!Subtarget->hasV6Ops())
      // Pre-v6 does not support unaligned mem access.
      return false;
-  else {
-    // v6+ may or may not support unaligned mem access depending on the system
-    // configuration.
-    // FIXME: This is pretty conservative. Should we provide cmdline option to
-    // control the behaviour?
-    if (!Subtarget->isTargetDarwin())
-      return false;
-  }
+
+  // v6+ may or may not support unaligned mem access depending on the system
+  // configuration.
+  // FIXME: This is pretty conservative. Should we provide cmdline option to
+  // control the behaviour?
+  if (!Subtarget->isTargetDarwin())
+    return false;
  
    switch (VT.getSimpleVT().SimpleTy) {
    default:
@@ -5051,7 +5046,6 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
  /// vector.  If it is invalid, don't add anything to Ops.
  void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                       char Constraint,
-                                                     bool hasMemory,
                                                       std::vector<SDValue>&Ops,
                                                       SelectionDAG &DAG) const {
    SDValue Result(0, 0);
@@ -5200,8 +5194,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
      Ops.push_back(Result);
      return;
    }
-  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
-                                                      Ops, DAG);
+  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
  }
  
  bool