From 69585fd51be3cf8ebeec649f1d3ba33e47759a68 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Thu, 13 Aug 2015 13:37:06 +0000
Subject: [PATCH] [SystemZ] Support large LLVM IR struct return values

Recent mesa/llvmpipe crashes on SystemZ due to a failed assertion when
attempting to compile a routine with a return type of
  { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
on a system without vector instruction support.

This is because after legalizing the vector type, we get a return value
consisting of 16 floats, which cannot all be returned in registers.

Usually, what should happen in this case is that the target's CanLowerReturn
routine rejects the return type, in which case SelectionDAG falls back to
implementing a structure return in memory via implicit reference.

However, the SystemZ target never actually implemented any CanLowerReturn
routine, and thus would accept any struct return type.

This patch fixes the crash by implementing CanLowerReturn.  As a side effect,
this also handles fp128 return values, fixing a todo that was noted in
SystemZCallingConv.td.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244889 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/SystemZ/SystemZCallingConv.td   |  4 --
 lib/Target/SystemZ/SystemZISelLowering.cpp | 14 ++++
 lib/Target/SystemZ/SystemZISelLowering.h   |  4 ++
 test/CodeGen/SystemZ/args-04.ll            | 14 ++++
 test/CodeGen/SystemZ/args-07.ll            | 60 ++++++++++++++++
 test/CodeGen/SystemZ/args-08.ll            | 57 +++++++++++++++
 test/CodeGen/SystemZ/vec-args-06.ll        | 83 ++++++++++++++++++++++
 test/CodeGen/SystemZ/vec-args-07.ll        | 47 ++++++++++++
 8 files changed, 279 insertions(+), 4 deletions(-)
 create mode 100644 test/CodeGen/SystemZ/args-07.ll
 create mode 100644 test/CodeGen/SystemZ/args-08.ll
 create mode 100644 test/CodeGen/SystemZ/vec-args-06.ll
 create mode 100644 test/CodeGen/SystemZ/vec-args-07.ll
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
index be8f00b57ad..bdd1b1598ad 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@@ -53,10 +53,6 @@ def RetCC_SystemZ : CallingConv<[
   CCIfSubtarget<"hasVector()",
     CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
              CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
-
-  // ABI-compliant code returns long double by reference, but that conversion
-  // is left to higher-level code.  Perhaps we could add an f128 definition
-  // here for code that doesn't care about the ABI?
 ]>;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index a009a33229b..4aac09132ec 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1173,6 +1173,20 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
   return Chain;
 }
 
+bool SystemZTargetLowering::
+CanLowerReturn(CallingConv::ID CallConv,
+               MachineFunction &MF, bool isVarArg,
+               const SmallVectorImpl<ISD::OutputArg> &Outs,
+               LLVMContext &Context) const {
+  // Detect unsupported vector return types.
+  if (Subtarget.hasVector())
+    VerifyVectorTypes(Outs);
+
+  SmallVector<CCValAssign, 16> RetLocs;
+  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
+  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
+}
+
 SDValue
 SystemZTargetLowering::LowerReturn(SDValue Chain,
                                    CallingConv::ID CallConv, bool IsVarArg,
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 949b67f114e..07ff2514458 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -423,6 +423,10 @@ public:
   SDValue LowerCall(CallLoweringInfo &CLI,
                     SmallVectorImpl<SDValue> &InVals) const override;
 
+  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+                      bool isVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      LLVMContext &Context) const override;
   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
                       const SmallVectorImpl<ISD::OutputArg> &Outs,
                       const SmallVectorImpl<SDValue> &OutVals,
diff --git a/test/CodeGen/SystemZ/args-04.ll b/test/CodeGen/SystemZ/args-04.ll
index 1178bb4dafd..48a2cf49104 100644
--- a/test/CodeGen/SystemZ/args-04.ll
+++ b/test/CodeGen/SystemZ/args-04.ll
@@ -124,3 +124,17 @@ define void @f13(fp128 *%r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
   store fp128 %y, fp128 *%r2
   ret void
 }
+
+; Explicit fp128 return values are likewise passed indirectly.
+define fp128 @f14(fp128 %r3) {
+; CHECK-LABEL: f14:
+; CHECK: ld %f0, 0(%r3)
+; CHECK: ld %f2, 8(%r3)
+; CHECK: axbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %y = fadd fp128 %r3, %r3
+  ret fp128 %y
+}
+
diff --git a/test/CodeGen/SystemZ/args-07.ll b/test/CodeGen/SystemZ/args-07.ll
new file mode 100644
index 00000000000..29d9b319ffc
--- /dev/null
+++ b/test/CodeGen/SystemZ/args-07.ll
@@ -0,0 +1,60 @@
+; Test multiple return values (LLVM ABI extension)
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Up to four integer return values fit into GPRs.
+define { i64, i64, i64, i64 } @f1() {
+; CHECK-LABEL: f1:
+; CHECK: lghi %r2, 0
+; CHECK: lghi %r3, 1
+; CHECK: lghi %r4, 2
+; CHECK: lghi %r5, 3
+; CHECK: br %r14
+  ret { i64, i64, i64, i64 } { i64 0, i64 1, i64 2, i64 3 }
+}
+
+; More than four integer return values use sret.
+define { i64, i64, i64, i64, i64 } @f2() {
+; CHECK-LABEL: f2:
+; CHECK: mvghi 32(%r2), 4
+; CHECK: mvghi 24(%r2), 3
+; CHECK: mvghi 16(%r2), 2
+; CHECK: mvghi 8(%r2), 1
+; CHECK: mvghi 0(%r2), 0
+; CHECK: br %r14
+  ret { i64, i64, i64, i64, i64 } { i64 0, i64 1, i64 2, i64 3, i64 4 }
+}
+
+; Up to four floating-point return values fit into FPRs.
+define { double, double, double, double } @f3() {
+; CHECK-LABEL: f3:
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: ldeb %f0, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: ldeb %f2, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: ldeb %f4, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: ldeb %f6, 0([[TMP]])
+; CHECK: br %r14
+  ret { double, double, double, double }
+      { double 1.0, double 2.0, double 3.0, double 4.0 }
+}
+
+; More than four floating-point return values use sret.
+define { double, double, double, double, double } @f4() {
+; CHECK-LABEL: f4:
+; CHECK: llihh [[TMP:%r[0-5]]], 16404
+; CHECK: stg [[TMP]], 32(%r2)
+; CHECK: llihh [[TMP:%r[0-5]]], 16400
+; CHECK: stg [[TMP]], 24(%r2)
+; CHECK: llihh [[TMP:%r[0-5]]], 16392
+; CHECK: stg [[TMP]], 16(%r2)
+; CHECK: llihh [[TMP:%r[0-5]]], 16384
+; CHECK: stg [[TMP]], 8(%r2)
+; CHECK: llihh [[TMP:%r[0-5]]], 16368
+; CHECK: stg [[TMP]], 0(%r2)
+; CHECK: br %r14
+  ret { double, double, double, double, double }
+      { double 1.0, double 2.0, double 3.0, double 4.0, double 5.0 }
+}
diff --git a/test/CodeGen/SystemZ/args-08.ll b/test/CodeGen/SystemZ/args-08.ll
new file mode 100644
index 00000000000..0bad5a8989d
--- /dev/null
+++ b/test/CodeGen/SystemZ/args-08.ll
@@ -0,0 +1,57 @@
+; Test calling functions with multiple return values (LLVM ABI extension)
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Up to four integer return values fit into GPRs.
+declare { i64, i64, i64, i64 } @bar1()
+
+define i64 @f1() {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, bar1
+; CHECK: lgr %r2, %r5
+; CHECK: br %r14
+  %mret = call { i64, i64, i64, i64 } @bar1()
+  %ret = extractvalue { i64, i64, i64, i64 } %mret, 3
+  ret i64 %ret
+}
+
+; More than four integer return values use sret.
+declare { i64, i64, i64, i64, i64 } @bar2()
+
+define i64 @f2() {
+; CHECK-LABEL: f2:
+; CHECK: la %r2, 160(%r15)
+; CHECK: brasl %r14, bar2
+; CHECK: lg  %r2, 192(%r15)
+; CHECK: br %r14
+  %mret = call { i64, i64, i64, i64, i64 } @bar2()
+  %ret = extractvalue { i64, i64, i64, i64, i64 } %mret, 4
+  ret i64 %ret
+}
+
+; Up to four floating-point return values fit into GPRs.
+declare { double, double, double, double } @bar3()
+
+define double @f3() {
+; CHECK-LABEL: f3:
+; CHECK: brasl %r14, bar3
+; CHECK: ldr %f0, %f6
+; CHECK: br %r14
+  %mret = call { double, double, double, double } @bar3()
+  %ret = extractvalue { double, double, double, double } %mret, 3
+  ret double %ret
+}
+
+; More than four integer return values use sret.
+declare { double, double, double, double, double } @bar4()
+
+define double @f4() {
+; CHECK-LABEL: f4:
+; CHECK: la %r2, 160(%r15)
+; CHECK: brasl %r14, bar4
+; CHECK: ld  %f0, 192(%r15)
+; CHECK: br %r14
+  %mret = call { double, double, double, double, double } @bar4()
+  %ret = extractvalue { double, double, double, double, double } %mret, 4
+  ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-args-06.ll b/test/CodeGen/SystemZ/vec-args-06.ll
new file mode 100644
index 00000000000..b26131ca1d4
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-06.ll
@@ -0,0 +1,83 @@
+; Test multiple return values (LLVM ABI extension)
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Up to eight vector return values fit into VRs.
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+         <2 x double>, <2 x double>, <2 x double>, <2 x double> } @f1() {
+; CHECK-LABEL: f1:
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v24, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v26, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v28, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v30, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v25, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v27, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v29, 0([[TMP]])
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl %v31, 0([[TMP]])
+; CHECK: br %r14
+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+        <2 x double>, <2 x double>, <2 x double>, <2 x double> }
+      { <2 x double> <double 1.0, double 1.1>,
+        <2 x double> <double 2.0, double 2.1>,
+        <2 x double> <double 3.0, double 3.1>,
+        <2 x double> <double 4.0, double 4.1>,
+        <2 x double> <double 5.0, double 5.1>,
+        <2 x double> <double 6.0, double 6.1>,
+        <2 x double> <double 7.0, double 7.1>,
+        <2 x double> <double 8.0, double 8.1> }
+}
+
+; More than eight vector return values use sret.
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+         <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+         <2 x double> } @f2() {
+; CHECK-LABEL: f2:
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 128(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 112(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 96(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 80(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 64(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 48(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 32(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 16(%r2)
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
+; CHECK: vst [[VTMP]], 0(%r2)
+; CHECK: br %r14
+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+        <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+        <2 x double> }
+      { <2 x double> <double 1.0, double 1.1>,
+        <2 x double> <double 2.0, double 2.1>,
+        <2 x double> <double 3.0, double 3.1>,
+        <2 x double> <double 4.0, double 4.1>,
+        <2 x double> <double 5.0, double 5.1>,
+        <2 x double> <double 6.0, double 6.1>,
+        <2 x double> <double 7.0, double 7.1>,
+        <2 x double> <double 8.0, double 8.1>,
+        <2 x double> <double 9.0, double 9.1> }
+}
diff --git a/test/CodeGen/SystemZ/vec-args-07.ll b/test/CodeGen/SystemZ/vec-args-07.ll
new file mode 100644
index 00000000000..f0b5e6835cf
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-07.ll
@@ -0,0 +1,47 @@
+; Test calling functions with multiple return values (LLVM ABI extension)
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Up to eight vector return values fit into VRs.
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+          <2 x double>, <2 x double>, <2 x double>, <2 x double> } @bar1()
+
+define <2 x double> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, bar1
+; CHECK: vlr %v24, %v31
+; CHECK: br %r14
+  %mret = call { <2 x double>, <2 x double>,
+                 <2 x double>, <2 x double>,
+                 <2 x double>, <2 x double>,
+                 <2 x double>, <2 x double> } @bar1()
+  %ret = extractvalue { <2 x double>, <2 x double>,
+                        <2 x double>, <2 x double>,
+                        <2 x double>, <2 x double>,
+                        <2 x double>, <2 x double> } %mret, 7
+  ret <2 x double> %ret
+}
+
+; More than eight vector return values use sret.
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+          <2 x double>, <2 x double>, <2 x double>, <2 x double>,
+          <2 x double> } @bar2()
+
+define <2 x double> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: la %r2, 160(%r15)
+; CHECK: brasl %r14, bar2
+; CHECK: vl  %v24, 288(%r15)
+; CHECK: br %r14
+  %mret = call { <2 x double>, <2 x double>,
+                 <2 x double>, <2 x double>,
+                 <2 x double>, <2 x double>,
+                 <2 x double>, <2 x double>,
+                 <2 x double> } @bar2()
+  %ret = extractvalue { <2 x double>, <2 x double>,
+                        <2 x double>, <2 x double>,
+                        <2 x double>, <2 x double>,
+                        <2 x double>, <2 x double>,
+                        <2 x double> } %mret, 8
+  ret <2 x double> %ret
+}
-- 
2.34.1