// X86 C Calling Convention
//===----------------------------------------------------------------------===//
+/// CC_X86_32_Vector_Common - In all X86-32 calling conventions, extra vector
+/// values are spilled on the stack.
+def CC_X86_32_Vector_Common : CallingConv<[
+ // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>,
+
+ // 512-bit AVX 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToStack<64, 64>>
+]>;
+
+// CC_X86_32_Vector_Standard - The first 3 vector arguments are passed in
+// vector registers
+def CC_X86_32_Vector_Standard : CallingConv<[
+ // SSE vector arguments are passed in XMM registers.
+ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2]>>>,
+
+ // AVX 256-bit vector arguments are passed in YMM registers.
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasFp256()",
+ CCAssignToReg<[YMM0, YMM1, YMM2]>>>>,
+
+ // AVX 512-bit vector arguments are passed in ZMM registers.
+ CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>>,
+
+ CCDelegateTo<CC_X86_32_Vector_Common>
+]>;
+
+// CC_X86_32_Vector_Darwin - The first 4 vector arguments are passed in
+// vector registers.
+def CC_X86_32_Vector_Darwin : CallingConv<[
+ // SSE vector arguments are passed in XMM registers.
+ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
+
+ // AVX 256-bit vector arguments are passed in YMM registers.
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasFp256()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
+
+ // AVX 512-bit vector arguments are passed in ZMM registers.
+ CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3]>>>,
+
+ CCDelegateTo<CC_X86_32_Vector_Common>
+]>;
+
/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
-/// values are spilled on the stack, and the first 4 vector values go in XMM
-/// regs.
+/// values are spilled on the stack.
def CC_X86_32_Common : CallingConv<[
// Handles byval parameters.
CCIfByVal<CCPassByVal<4, 4>>,
CCIfType<[v32i1], CCPromoteToType<v32i8>>,
CCIfType<[v64i1], CCPromoteToType<v64i8>>,
- // The first 4 SSE vector arguments are passed in XMM registers.
- CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
-
- // The first 4 AVX 256-bit vector arguments are passed in YMM registers.
- CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
- CCIfSubtarget<"hasFp256()",
- CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
-
- // The first 4 AVX 512-bit vector arguments are passed in ZMM registers.
- CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
- CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3]>>>,
-
- // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
-
- // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
- CCAssignToStack<32, 32>>,
-
- // 512-bit AVX 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
- CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
- CCAssignToStack<64, 64>>,
-
// __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
// passed in the parameter area.
- CCIfType<[x86mmx], CCAssignToStack<8, 4>>]>;
+ CCIfType<[x86mmx], CCAssignToStack<8, 4>>,
+
+ // Darwin passes vectors in a form that differs from the i386 psABI
+ CCIfSubtarget<"isTargetDarwin()", CCDelegateTo<CC_X86_32_Vector_Darwin>>,
+
+ // Otherwise, drop to 'normal' X86-32 CC
+ CCDelegateTo<CC_X86_32_Vector_Standard>
+]>;
def CC_X86_32_C : CallingConv<[
// Promote i1/i8/i16 arguments to i32.
--- /dev/null
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=DARWIN
+; RUN: llc < %s -mtriple=i686-pc-linux -mattr=+avx512f | FileCheck %s --check-prefix=LINUX
+
+; CHECK-LABEL: test_sse:
+; DARWIN-DAG: vpaddd %xmm1, %xmm0, %xmm0
+; DARWIN-DAG: vpaddd %xmm3, %xmm2, %xmm1
+; DARWIN: vpaddd %xmm1, %xmm0, %xmm0
+; LINUX-DAG: vpaddd %xmm1, %xmm0, %xmm0
+; LINUX-DAG: vpaddd {{[0-9]+}}(%e{{s|b}}p), %xmm2, %xmm1
+; LINUX: vpaddd %xmm1, %xmm0, %xmm0
+define <4 x i32> @test_sse(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) nounwind {
+ %r0 = add <4 x i32> %a, %b
+ %r1 = add <4 x i32> %c, %d
+ %ret = add <4 x i32> %r0, %r1
+ ret <4 x i32> %ret
+}
+
+; CHECK-LABEL: test_avx:
+; DARWIN-DAG: vpaddd %ymm1, %ymm0, %ymm0
+; DARWIN-DAG: vpaddd %ymm3, %ymm2, %ymm1
+; DARWIN: vpaddd %ymm1, %ymm0, %ymm0
+; LINUX-DAG: vpaddd %ymm1, %ymm0, %ymm0
+; LINUX-DAG: vpaddd {{[0-9]+}}(%e{{s|b}}p), %ymm2, %ymm1
+; LINUX: vpaddd %ymm1, %ymm0, %ymm0
+define <8 x i32> @test_avx(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) nounwind {
+ %r0 = add <8 x i32> %a, %b
+ %r1 = add <8 x i32> %c, %d
+ %ret = add <8 x i32> %r0, %r1
+ ret <8 x i32> %ret
+}
+
+; CHECK-LABEL: test_avx512:
+; DARWIN-DAG: vpaddd %zmm1, %zmm0, %zmm0
+; DARWIN-DAG: vpaddd %zmm3, %zmm2, %zmm1
+; DARWIN: vpaddd %zmm1, %zmm0, %zmm0
+; LINUX-DAG: vpaddd %zmm1, %zmm0, %zmm0
+; LINUX-DAG: vpaddd {{[0-9]+}}(%e{{s|b}}p), %zmm2, %zmm1
+; LINUX: vpaddd %zmm1, %zmm0, %zmm0
+define <16 x i32> @test_avx512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) nounwind {
+ %r0 = add <16 x i32> %a, %b
+ %r1 = add <16 x i32> %c, %d
+ %ret = add <16 x i32> %r0, %r1
+ ret <16 x i32> %ret
+}