[X86] Better support for the MCU psABI (LLVM part)

author Michael Kuperstein <michael.m.kuperstein@intel.com>

Mon, 28 Dec 2015 14:39:21 +0000 (14:39 +0000)

committer Michael Kuperstein <michael.m.kuperstein@intel.com>

Mon, 28 Dec 2015 14:39:21 +0000 (14:39 +0000)
author Michael Kuperstein <michael.m.kuperstein@intel.com>
Mon, 28 Dec 2015 14:39:21 +0000 (14:39 +0000)
committer Michael Kuperstein <michael.m.kuperstein@intel.com>
Mon, 28 Dec 2015 14:39:21 +0000 (14:39 +0000)
diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h

index 9d4e7a04d905a72ac836c68d321736767d9e0a2a..0c6c1f1468c43e9e832de274e324d9ee1153a6a3 100644 (file)
--- a/include/llvm/Target/TargetCallingConv.h
+++ b/include/llvm/Target/TargetCallingConv.h
@@ -46,6 +46,8 @@ namespace ISD {
      static const uint64_t SplitOffs      = 11;
      static const uint64_t InAlloca       = 1ULL<<12; ///< Passed with inalloca
      static const uint64_t InAllocaOffs   = 12;
+    static const uint64_t SplitEnd       = 1ULL<<13; ///< Last part of a split
+    static const uint64_t SplitEndOffs   = 13;
      static const uint64_t OrigAlign      = 0x1FULL<<27;
      static const uint64_t OrigAlignOffs  = 27;
      static const uint64_t ByValSize      = 0x3fffffffULL<<32; ///< Struct size
@@ -103,6 +105,9 @@ namespace ISD {
      bool isSplit()   const { return Flags & Split; }
      void setSplit()  { Flags |= One << SplitOffs; }
  
+    bool isSplitEnd()   const { return Flags & SplitEnd; }
+    void setSplitEnd()  { Flags |= One << SplitEndOffs; }
+
      unsigned getOrigAlign() const {
        return (unsigned)
          ((One << ((Flags & OrigAlign) >> OrigAlignOffs)) / 2);
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index a30b060622aca94c651771d7a1d2c0f237068105..140c36591acc8c808debc0fac32063d61623235e 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -2453,13 +2453,6 @@ public:
  
    };
  
-  // Mark inreg arguments for lib-calls. For normal calls this is done by
-  // the frontend ABI code.
-  virtual void markInRegArguments(SelectionDAG &DAG, 
-                 TargetLowering::ArgListTy &Args) const {
-    return;
-  }
-
    /// This function lowers an abstract call to a function into an actual call.
    /// This returns a pair of operands.  The first element is the return value
    /// for the function (if RetTy is not VoidTy).  The second element is the
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index b15b4bd1cf3019e13a8027e57de82b6c88dee4eb..d2ea85ab4d2285b48cd44d2f02e1a497ae537eab 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7145,8 +7145,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
                                 i, j*Parts[j].getValueType().getStoreSize());
          if (NumParts > 1 && j == 0)
            MyFlags.Flags.setSplit();
-        else if (j != 0)
+        else if (j != 0) {
            MyFlags.Flags.setOrigAlign(1);
+          if (j == NumParts - 1)
+            MyFlags.Flags.setSplitEnd();
+        }
  
          CLI.Outs.push_back(MyFlags);
          CLI.OutVals.push_back(Parts[j]);
@@ -7390,8 +7393,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
          if (NumRegs > 1 && i == 0)
            MyFlags.Flags.setSplit();
          // if it isn't first piece, alignment must be 1
-        else if (i > 0)
+        else if (i > 0) {
            MyFlags.Flags.setOrigAlign(1);
+          if (i == NumRegs - 1)
+            MyFlags.Flags.setSplitEnd();
+        }
          Ins.push_back(MyFlags);
        }
        if (NeedsRegBlock && Value == NumValues - 1)
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index bb31231f4e1ad7bb5ada6795622b2f0943855621..9b74a48d7ea3778ec012c08441dfbb179516789e 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -101,8 +101,6 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
      Args.push_back(Entry);
    }
  
-  markInRegArguments(DAG, Args);
-
    if (LC == RTLIB::UNKNOWN_LIBCALL)
      report_fatal_error("Unsupported library call operation!");
    SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h

index 0eb2494f1d639fd380f7ec5cf6c2dda56d9f3cbe..a08160f9febabbe6c3b5ee1562af688e3aa6df9c 100644 (file)
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -15,6 +15,7 @@
  #ifndef LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
  #define LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
  
+#include "MCTargetDesc/X86MCTargetDesc.h"
  #include "llvm/CodeGen/CallingConvLower.h"
  #include "llvm/IR/CallingConv.h"
  
@@ -42,6 +43,64 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
    return false;
  }
  
+inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
+                                         MVT &LocVT,
+                                         CCValAssign::LocInfo &LocInfo,
+                                         ISD::ArgFlagsTy &ArgFlags,
+                                         CCState &State) {
+  // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
+  // not to split i64 and double between a register and stack
+  static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
+  static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]);
+  
+  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+  // If this is the first part of an double/i64/i128, or if we're already
+  // in the middle of a split, add to the pending list. If this is not
+  // the end of the split, return, otherwise go on to process the pending
+  // list
+  if (ArgFlags.isSplit() || !PendingMembers.empty()) {
+    PendingMembers.push_back(
+        CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+    if (!ArgFlags.isSplitEnd())
+      return true;
+  }
+
+  // If there are no pending members, we are not in the middle of a split,
+  // so do the usual inreg stuff.
+  if (PendingMembers.empty()) {
+    if (unsigned Reg = State.AllocateReg(RegList)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return true;
+    }
+    return false;
+  }
+
+  assert(ArgFlags.isSplitEnd());
+
+  // We now have the entire original argument in PendingMembers, so decide
+  // whether to use registers or the stack.
+  // Per the MCU ABI:
+  // a) To use registers, we need to have enough of them free to contain
+  // the entire argument.
+  // b) We never want to use more than 2 registers for a single argument.
+
+  unsigned FirstFree = State.getFirstUnallocated(RegList);
+  bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
+
+  for (auto &It : PendingMembers) {
+    if (UseRegs)
+      It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
+    else
+      It.convertToMem(State.AllocateStack(4, 4));
+    State.addLoc(It);
+  }
+
+  PendingMembers.clear();
+
+  return true;
+}
+
  } // End llvm namespace
  
  #endif
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td

index 26100a56ee6bc0779b59b281a3ea6f0f3905178c..54d88cbb244eb9937bffcec1d0ed85ed90c15196 100644 (file)
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -592,6 +592,23 @@ def CC_X86_32_C : CallingConv<[
    CCDelegateTo<CC_X86_32_Common>
  ]>;
  
+def CC_X86_32_MCU : CallingConv<[
+  // Handles byval parameters.  Note that, like FastCC, we can't rely on
+  // the delegation to CC_X86_32_Common because that happens after code that
+  // puts arguments in registers.
+  CCIfByVal<CCPassByVal<4, 4>>,
+
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+  // If the call is not a vararg call, some arguments may be passed
+  // in integer registers.
+  CCIfNotVarArg<CCIfType<[i32], CCCustom<"CC_X86_32_MCUInReg">>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
  def CC_X86_32_FastCall : CallingConv<[
    // Promote i1/i8/i16 arguments to i32.
    CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -753,6 +770,7 @@ def CC_X86_64_Intr : CallingConv<[
  
  // This is the root argument convention for the X86-32 backend.
  def CC_X86_32 : CallingConv<[
+  CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
    CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
    CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
    CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp

index 914fd04ad6b7f7cfaa18ae284f17a2988487c6ef..de94a138d865e51962a443d4ace8cafb7476abc9 100644 (file)
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1098,12 +1098,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
      RetRegs.push_back(VA.getLocReg());
    }
  
-  // The x86-64 ABI for returning structs by value requires that we copy
-  // the sret argument into %rax for the return. We saved the argument into
-  // a virtual register in the entry block, so now we copy the value out
-  // and into %rax. We also do the same with %eax for Win32.
-  if (F.hasStructRetAttr() &&
-      (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
+  // All x86 ABIs require that for returning structs by value we copy\r
+  // the sret argument into %rax/%eax (depending on ABI) for the return.\r
+  // We saved the argument into a virtual register in the entry block,\r
+  // so now we copy the value out and into %rax/%eax.
+  if (F.hasStructRetAttr()) {
      unsigned Reg = X86MFInfo->getSRetReturnReg();
      assert(Reg &&
             "SRetReturnReg should have been set in LowerFormalArguments()!");
@@ -2820,7 +2819,7 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
  
    if (CS)
      if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) ||
-        CS->paramHasAttr(1, Attribute::InReg))
+        CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU())
        return 0;
  
    return 4;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 182a8cd3e8ae5a5e2b8af85211ed9542bb745c15..e24bec7156e64d6c01bf706bd8e56ca445adc187 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2447,28 +2447,28 @@ enum StructReturnType {
    StackStructReturn
  };
  static StructReturnType
-callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
    if (Outs.empty())
      return NotStructReturn;
  
    const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
    if (!Flags.isSRet())
      return NotStructReturn;
-  if (Flags.isInReg())
+  if (Flags.isInReg() || IsMCU)
      return RegStructReturn;
    return StackStructReturn;
  }
  
  /// Determines whether a function uses struct return semantics.
  static StructReturnType
-argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
    if (Ins.empty())
      return NotStructReturn;
  
    const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
    if (!Flags.isSRet())
      return NotStructReturn;
-  if (Flags.isInReg())
+  if (Flags.isInReg() || IsMCU)
      return RegStructReturn;
    return StackStructReturn;
  }
@@ -2945,7 +2945,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
      // If this is an sret function, the return should pop the hidden pointer.
      if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
          !Subtarget->getTargetTriple().isOSMSVCRT() &&
-        argsAreStructReturn(Ins) == StackStructReturn)
+        argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn)
        FuncInfo->setBytesToPopOnReturn(4);
    }
  
@@ -3065,7 +3065,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
    MachineFunction &MF = DAG.getMachineFunction();
    bool Is64Bit        = Subtarget->is64Bit();
    bool IsWin64        = Subtarget->isCallingConvWin64(CallConv);
-  StructReturnType SR = callIsStructReturn(Outs);
+  StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU());
    bool IsSibcall      = false;
    X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
    auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
@@ -28661,27 +28661,3 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
                                     Attribute::MinSize);
    return OptSize && !VT.isVector();
  }
-
-void X86TargetLowering::markInRegArguments(SelectionDAG &DAG,
-       TargetLowering::ArgListTy& Args) const {
-  // The MCU psABI requires some arguments to be passed in-register.
-  // For regular calls, the inreg arguments are marked by the front-end.
-  // However, for compiler generated library calls, we have to patch this
-  // up here.
-  if (!Subtarget->isTargetMCU() || !Args.size())
-    return;
-
-  unsigned FreeRegs = 3;
-  for (auto &Arg : Args) {
-    // For library functions, we do not expect any fancy types.
-    unsigned Size = DAG.getDataLayout().getTypeSizeInBits(Arg.Ty);
-    unsigned SizeInRegs = (Size + 31) / 32;
-    if (SizeInRegs > 2 || SizeInRegs > FreeRegs)
-      continue;
-
-    Arg.isInReg = true;
-    FreeRegs -= SizeInRegs;
-    if (!FreeRegs)
-      break;
-  }
-}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 00e83a3c6529b050e3608e9ab9a9a3869fcda1e5..a29dc9af54f6c778717a57c4c9a781c1c22d0738 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -927,9 +927,6 @@ namespace llvm {
  
      bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
  
-    void markInRegArguments(SelectionDAG &DAG, TargetLowering::ArgListTy& Args)
-      const override;
-
    protected:
      std::pair<const TargetRegisterClass *, uint8_t>
      findRepresentativeClass(const TargetRegisterInfo *TRI,
diff --git a/test/CodeGen/X86/mcu-abi.ll b/test/CodeGen/X86/mcu-abi.ll

index 6dc2c993032012306121cac6f6e8b3839baead90..966fd4521f2d613c982fc95b1fbf0fba45edfce3 100644 (file)
--- a/test/CodeGen/X86/mcu-abi.ll
+++ b/test/CodeGen/X86/mcu-abi.ll
@@ -1,11 +1,112 @@
  ; RUN: llc < %s -mtriple=i686-pc-elfiamcu | FileCheck %s
  
+%struct.st12_t = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+; CHECK-LABEL: test_ints:
+; CHECK: addl    %edx, %eax
+; CHECK-NEXT: imull   %ecx, %eax
+; CHECK-NEXT: addl    4(%esp), %eax
+; CHECK-NEXT: retl
+define i32 @test_ints(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
+entry:
+  %r1 = add i32 %b, %a
+  %r2 = mul i32 %c, %r1
+  %r3 = add i32 %d, %r2
+  ret i32 %r3
+}
+
+; CHECK-LABEL: test_floats:
+; CHECK: addl    %edx, %eax
+; CHECK-NEXT: imull   %ecx, %eax
+; CHECK-NEXT: addl    4(%esp), %eax
+; CHECK-NEXT: retl
+define i32 @test_floats(i32 %a, i32 %b, float %c, float %d) #0 {
+entry:
+  %ci = bitcast float %c to i32
+  %di = bitcast float %d to i32
+  %r1 = add i32 %b, %a
+  %r2 = mul i32 %ci, %r1
+  %r3 = add i32 %di, %r2
+  ret i32 %r3
+}
+
+; CHECK-LABEL: test_doubles:
+; CHECK: addl    4(%esp), %eax
+; CHECK-NEXT: adcl    8(%esp), %edx
+; CHECK-NEXT: retl
+define double @test_doubles(double %d1, double %d2) #0 {
+entry:
+    %d1i = bitcast double %d1 to i64
+    %d2i = bitcast double %d2 to i64
+    %r = add i64 %d1i, %d2i
+    %rd = bitcast i64 %r to double
+    ret double %rd
+}
+
+; CHECK-LABEL: test_mixed_doubles:
+; CHECK: addl    %ecx, %eax
+; CHECK-NEXT: adcl    $0, %edx
+; CHECK-NEXT: retl
+define double @test_mixed_doubles(double %d2, i32 %i) #0 {
+entry:
+    %iext = zext i32 %i to i64
+    %d2i = bitcast double %d2 to i64
+    %r = add i64 %iext, %d2i
+    %rd = bitcast i64 %r to double
+    ret double %rd
+}
+
+; CHECK-LABEL: ret_large_struct:
+; CHECK: pushl   %esi
+; CHECK-NEXT: movl    %eax, %esi
+; CHECK-NEXT: leal    8(%esp), %edx
+; CHECK-NEXT: movl    $48, %ecx
+; CHECK-NEXT: calll   memcpy
+; CHECK-NEXT: movl    %esi, %eax
+; CHECK-NEXT: popl    %esi
+; CHECK-NOT:  retl $4
+; CHECK-NEXT: retl
+define void @ret_large_struct(%struct.st12_t* noalias nocapture sret %agg.result, %struct.st12_t* byval nocapture readonly align 4 %r) #0 {
+entry:
+  %0 = bitcast %struct.st12_t* %agg.result to i8*
+  %1 = bitcast %struct.st12_t* %r to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 48, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: var_args:
+; CHECK: movl    4(%esp), %eax
+; CHECK-NEXT: retl
+define i32 @var_args(i32 %i1, ...) #0 {
+entry:
+  ret i32 %i1
+}
+
  ; CHECK-LABEL: test_lib_args:
  ; CHECK: movl %edx, %eax
  ; CHECK: calll __fixsfsi
-define i32 @test_lib_args(float inreg %a, float inreg %b) #0 {
+define i32 @test_lib_args(float %a, float %b) #0 {
    %ret = fptosi float %b to i32
    ret i32 %ret
  }
  
+; CHECK-LABEL: test_fp128:
+; CHECK: movl    (%eax), %e[[CX:..]]
+; CHECK-NEXT: movl    4(%eax), %e[[DX:..]]
+; CHECK-NEXT: movl    8(%eax), %e[[SI:..]]
+; CHECK-NEXT: movl    12(%eax), %e[[AX:..]]
+; CHECK-NEXT: movl    %e[[AX]], 12(%esp)
+; CHECK-NEXT: movl    %e[[SI]], 8(%esp)
+; CHECK-NEXT: movl    %e[[DX]], 4(%esp)
+; CHECK-NEXT: movl    %e[[CX]], (%esp)
+; CHECK-NEXT: calll   __fixtfsi
+define i32 @test_fp128(fp128* %ptr) #0 {
+  %v = load fp128, fp128* %ptr
+  %ret = fptosi fp128 %v to i32
+  ret i32 %ret
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
  attributes #0 = { nounwind "use-soft-float"="true"}
+attributes #1 = { nounwind argmemonly }
author	Michael Kuperstein <michael.m.kuperstein@intel.com>
	Mon, 28 Dec 2015 14:39:21 +0000 (14:39 +0000)
committer	Michael Kuperstein <michael.m.kuperstein@intel.com>
	Mon, 28 Dec 2015 14:39:21 +0000 (14:39 +0000)
include/llvm/Target/TargetCallingConv.h		patch \| blob \| history
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
lib/Target/X86/X86CallingConv.h		patch \| blob \| history
lib/Target/X86/X86CallingConv.td		patch \| blob \| history
lib/Target/X86/X86FastISel.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
test/CodeGen/X86/mcu-abi.ll		patch \| blob \| history