From: Bruno Cardoso Lopes Date: Tue, 9 Aug 2011 03:04:23 +0000 (+0000) Subject: Rename and tidy up tests X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=b33ea564485b430508ab94cf085fd47d7f0e49b0;p=oota-llvm.git Rename and tidy up tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137103 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/avx-128.ll b/test/CodeGen/X86/avx-128.ll deleted file mode 100644 index fe1472f4548..00000000000 --- a/test/CodeGen/X86/avx-128.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s - -@z = common global <4 x float> zeroinitializer, align 16 - -define void @zero() nounwind ssp { -entry: - ; CHECK: vxorps - ; CHECK: vmovaps - store <4 x float> zeroinitializer, <4 x float>* @z, align 16 - ret void -} - -define void @fpext() nounwind uwtable { -entry: - %f = alloca float, align 4 - %d = alloca double, align 8 - %tmp = load float* %f, align 4 - ; CHECK: vcvtss2sd - %conv = fpext float %tmp to double - store double %conv, double* %d, align 8 - ret void -} - -; CHECK: vcvtsi2sdq (% -define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp { -entry: - %tmp1 = load i64* %e, align 8 - %conv = sitofp i64 %tmp1 to double - ret double %conv -} - -; CHECK: vcvtsi2sd (% -define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp { -entry: - %tmp1 = load i32* %e, align 4 - %conv = sitofp i32 %tmp1 to double - ret double %conv -} - -; CHECK: vcvtsi2ss (% -define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp { -entry: - %tmp1 = load i32* %e, align 4 - %conv = sitofp i32 %tmp1 to float - ret float %conv -} - -; CHECK: vcvtsi2ssq (% -define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp { -entry: - %tmp1 = load i64* %e, align 8 - %conv = sitofp i64 %tmp1 to float - ret float %conv -} - -; CHECK: vsqrtss -define float @sqrtA(float %a) nounwind uwtable readnone ssp { -entry: - %conv1 = tail call float @sqrtf(float %a) nounwind readnone - ret float %conv1 -} - -declare double @sqrt(double) readnone - -; CHECK: vsqrtsd -define double @sqrtB(double %a) nounwind uwtable readnone ssp { -entry: - %call = tail call double @sqrt(double %a) nounwind readnone - ret double %call -} - -declare float @sqrtf(float) readnone diff --git a/test/CodeGen/X86/avx-256-arith.ll b/test/CodeGen/X86/avx-256-arith.ll deleted file mode 100644 index 5c512db0e2a..00000000000 --- a/test/CodeGen/X86/avx-256-arith.ll +++ /dev/null @@ -1,116 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s - -; CHECK: vaddpd -define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { -entry: - %add.i = fadd <4 x double> %x, %y - ret <4 x double> %add.i -} - -; CHECK: vaddpd LCP{{.*}}(%rip) -define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { -entry: - %add.i = fadd <4 x double> %y, - ret <4 x double> %add.i -} - -; CHECK: vaddps -define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { -entry: - %add.i = fadd <8 x float> %x, %y - ret <8 x float> %add.i -} - -; CHECK: vaddps LCP{{.*}}(%rip) -define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp { -entry: - %add.i = fadd <8 x float> %y, - ret <8 x float> %add.i -} - -; CHECK: vsubpd -define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { -entry: - %sub.i = fsub <4 x double> %x, %y - ret <4 x double> %sub.i -} - -; CHECK: vsubpd (% -define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp { -entry: - %tmp2 = load <4 x double>* %x, align 32 - %sub.i = fsub <4 x double> %y, %tmp2 - ret <4 x double> %sub.i -} - -; CHECK: vsubps -define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { -entry: - %sub.i = fsub <8 x float> %x, %y - ret <8 x float> %sub.i -} - -; CHECK: vsubps (% -define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp { -entry: - %tmp2 = load <8 x float>* %x, align 32 - %sub.i = fsub <8 x float> %y, %tmp2 - ret <8 x float> %sub.i -} - -; CHECK: vmulpd -define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { -entry: - %mul.i = fmul <4 x double> %x, %y - ret <4 x double> %mul.i -} - -; CHECK: vmulpd LCP{{.*}}(%rip) -define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { -entry: - %mul.i = fmul <4 x double> %y, - ret <4 x double> %mul.i -} - -; CHECK: vmulps -define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { -entry: - %mul.i = fmul <8 x float> %x, %y - ret <8 x float> %mul.i -} - -; CHECK: vmulps LCP{{.*}}(%rip) -define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp { -entry: - %mul.i = fmul <8 x float> %y, - ret <8 x float> %mul.i -} - -; CHECK: vdivpd -define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { -entry: - %div.i = fdiv <4 x double> %x, %y - ret <4 x double> %div.i -} - -; CHECK: vdivpd LCP{{.*}}(%rip) -define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { -entry: - %div.i = fdiv <4 x double> %y, - ret <4 x double> %div.i -} - -; CHECK: vdivps -define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { -entry: - %div.i = fdiv <8 x float> %x, %y - ret <8 x float> %div.i -} - -; CHECK: vdivps LCP{{.*}}(%rip) -define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp { -entry: - %div.i = fdiv <8 x float> %y, - ret <8 x float> %div.i -} - diff --git a/test/CodeGen/X86/avx-256-cmp.ll b/test/CodeGen/X86/avx-256-cmp.ll deleted file mode 100644 index 3323a5bd9cc..00000000000 --- a/test/CodeGen/X86/avx-256-cmp.ll +++ /dev/null @@ -1,18 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s - -; CHECK: vcmpltps %ymm -; CHECK-NOT: vucomiss -define <8 x i32> @cmp00(<8 x float> %a, <8 x float> %b) nounwind readnone { - %bincmp = fcmp olt <8 x float> %a, %b - %s = sext <8 x i1> %bincmp to <8 x i32> - ret <8 x i32> %s -} - -; CHECK: vcmpltpd %ymm -; CHECK-NOT: vucomisd -define <4 x i64> @cmp01(<4 x double> %a, <4 x double> %b) nounwind readnone { - %bincmp = fcmp olt <4 x double> %a, %b - %s = sext <4 x i1> %bincmp to <4 x i64> - ret <4 x i64> %s -} - diff --git a/test/CodeGen/X86/avx-256-cvt.ll b/test/CodeGen/X86/avx-256-cvt.ll deleted file mode 100644 index d97327906ca..00000000000 --- a/test/CodeGen/X86/avx-256-cvt.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s - -; CHECK: vcvtdq2ps %ymm -define <8 x float> @funcA(<8 x i32> %a) nounwind { - %b = sitofp <8 x i32> %a to <8 x float> - ret <8 x float> %b -} - -; CHECK: vcvttps2dq %ymm -define <8 x i32> @funcB(<8 x float> %a) nounwind { - %b = fptosi <8 x float> %a to <8 x i32> - ret <8 x i32> %b -} - -; CHECK: vcvtpd2psy %ymm -; CHECK-NEXT: vcvtpd2psy %ymm -; CHECK-NEXT: vinsertf128 $1 -define <8 x float> @funcC(<8 x double> %b) nounwind { - %a = fptrunc <8 x double> %b to <8 x float> - ret <8 x float> %a -} diff --git a/test/CodeGen/X86/avx-256-logic.ll b/test/CodeGen/X86/avx-256-logic.ll deleted file mode 100644 index d9e5d081fb1..00000000000 --- a/test/CodeGen/X86/avx-256-logic.ll +++ /dev/null @@ -1,161 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s - -; CHECK: vandpd -define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <4 x double> %x to <4 x i64> - %1 = bitcast <4 x double> %y to <4 x i64> - %and.i = and <4 x i64> %0, %1 - %2 = bitcast <4 x i64> %and.i to <4 x double> - ret <4 x double> %2 -} - -; CHECK: vandpd LCP{{.*}}(%rip) -define <4 x double> @andpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <4 x double> %y to <4 x i64> - %and.i = and <4 x i64> %0, - %1 = bitcast <4 x i64> %and.i to <4 x double> - ret <4 x double> %1 -} - -; CHECK: vandps -define <8 x float> @andps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <8 x float> %x to <8 x i32> - %1 = bitcast <8 x float> %y to <8 x i32> - %and.i = and <8 x i32> %0, %1 - %2 = bitcast <8 x i32> %and.i to <8 x float> - ret <8 x float> %2 -} - -; CHECK: vandps LCP{{.*}}(%rip) -define <8 x float> @andps256fold(<8 x float> %y) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <8 x float> %y to <8 x i32> - %and.i = and <8 x i32> %0, - %1 = bitcast <8 x i32> %and.i to <8 x float> - ret <8 x float> %1 -} - -; CHECK: vxorpd -define <4 x double> @xorpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <4 x double> %x to <4 x i64> - %1 = bitcast <4 x double> %y to <4 x i64> - %xor.i = xor <4 x i64> %0, %1 - %2 = bitcast <4 x i64> %xor.i to <4 x double> - ret <4 x double> %2 -} - -; CHECK: vxorpd LCP{{.*}}(%rip) -define <4 x double> @xorpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <4 x double> %y to <4 x i64> - %xor.i = xor <4 x i64> %0, - %1 = bitcast <4 x i64> %xor.i to <4 x double> - ret <4 x double> %1 -} - -; CHECK: vxorps -define <8 x float> @xorps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <8 x float> %x to <8 x i32> - %1 = bitcast <8 x float> %y to <8 x i32> - %xor.i = xor <8 x i32> %0, %1 - %2 = bitcast <8 x i32> %xor.i to <8 x float> - ret <8 x float> %2 -} - -; CHECK: vxorps LCP{{.*}}(%rip) -define <8 x float> @xorps256fold(<8 x float> %y) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <8 x float> %y to <8 x i32> - %xor.i = xor <8 x i32> %0, - %1 = bitcast <8 x i32> %xor.i to <8 x float> - ret <8 x float> %1 -} - -; CHECK: vorpd -define <4 x double> @orpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <4 x double> %x to <4 x i64> - %1 = bitcast <4 x double> %y to <4 x i64> - %or.i = or <4 x i64> %0, %1 - %2 = bitcast <4 x i64> %or.i to <4 x double> - ret <4 x double> %2 -} - -; CHECK: vorpd LCP{{.*}}(%rip) -define <4 x double> @orpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <4 x double> %y to <4 x i64> - %or.i = or <4 x i64> %0, - %1 = bitcast <4 x i64> %or.i to <4 x double> - ret <4 x double> %1 -} - -; CHECK: vorps -define <8 x float> @orps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <8 x float> %x to <8 x i32> - %1 = bitcast <8 x float> %y to <8 x i32> - %or.i = or <8 x i32> %0, %1 - %2 = bitcast <8 x i32> %or.i to <8 x float> - ret <8 x float> %2 -} - -; CHECK: vorps LCP{{.*}}(%rip) -define <8 x float> @orps256fold(<8 x float> %y) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <8 x float> %y to <8 x i32> - %or.i = or <8 x i32> %0, - %1 = bitcast <8 x i32> %or.i to <8 x float> - ret <8 x float> %1 -} - -; CHECK: vandnpd -define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <4 x double> %x to <4 x i64> - %neg.i = xor <4 x i64> %0, - %1 = bitcast <4 x double> %y to <4 x i64> - %and.i = and <4 x i64> %1, %neg.i - %2 = bitcast <4 x i64> %and.i to <4 x double> - ret <4 x double> %2 -} - -; CHECK: vandnpd (% -define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp { -entry: - %tmp2 = load <4 x double>* %x, align 32 - %0 = bitcast <4 x double> %y to <4 x i64> - %neg.i = xor <4 x i64> %0, - %1 = bitcast <4 x double> %tmp2 to <4 x i64> - %and.i = and <4 x i64> %1, %neg.i - %2 = bitcast <4 x i64> %and.i to <4 x double> - ret <4 x double> %2 -} - -; CHECK: vandnps -define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <8 x float> %x to <8 x i32> - %neg.i = xor <8 x i32> %0, - %1 = bitcast <8 x float> %y to <8 x i32> - %and.i = and <8 x i32> %1, %neg.i - %2 = bitcast <8 x i32> %and.i to <8 x float> - ret <8 x float> %2 -} - -; CHECK: vandnps (% -define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp { -entry: - %tmp2 = load <8 x float>* %x, align 32 - %0 = bitcast <8 x float> %y to <8 x i32> - %neg.i = xor <8 x i32> %0, - %1 = bitcast <8 x float> %tmp2 to <8 x i32> - %and.i = and <8 x i32> %1, %neg.i - %2 = bitcast <8 x i32> %and.i to <8 x float> - ret <8 x float> %2 -} diff --git a/test/CodeGen/X86/avx-256-movdup.ll b/test/CodeGen/X86/avx-256-movdup.ll deleted file mode 100644 index 42d84def98a..00000000000 --- a/test/CodeGen/X86/avx-256-movdup.ll +++ /dev/null @@ -1,34 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s - -; CHECK: vmovsldup -define <8 x float> @movdupA(<8 x float> %src) nounwind uwtable readnone ssp { -entry: - %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> - ret <8 x float> %shuffle.i -} - -; CHECK: vmovshdup -define <8 x float> @movdupB(<8 x float> %src) nounwind uwtable readnone ssp { -entry: - %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> - ret <8 x float> %shuffle.i -} - -; CHECK: vmovsldup -define <4 x i64> @movdupC(<4 x i64> %src) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <4 x i64> %src to <8 x float> - %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> - %1 = bitcast <8 x float> %shuffle.i to <4 x i64> - ret <4 x i64> %1 -} - -; CHECK: vmovshdup -define <4 x i64> @movdupD(<4 x i64> %src) nounwind uwtable readnone ssp { -entry: - %0 = bitcast <4 x i64> %src to <8 x float> - %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> - %1 = bitcast <8 x float> %shuffle.i to <4 x i64> - ret <4 x i64> %1 -} - diff --git a/test/CodeGen/X86/avx-256-splat.ll b/test/CodeGen/X86/avx-256-splat.ll deleted file mode 100644 index 36d469417f9..00000000000 --- a/test/CodeGen/X86/avx-256-splat.ll +++ /dev/null @@ -1,79 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s - -; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd - -; CHECK: vextractf128 $0 -; CHECK-NEXT: punpcklbw -; CHECK-NEXT: punpckhbw -; CHECK-NEXT: vinsertf128 $1 -; CHECK-NEXT: vpermilps $85 -define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { -entry: - %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> - ret <32 x i8> %shuffle -} - -; CHECK: vextractf128 $0 -; CHECK-NEXT: punpckhwd -; CHECK-NEXT: vinsertf128 $1 -; CHECK-NEXT: vpermilps $85 -define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { -entry: - %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> - ret <16 x i16> %shuffle -} - -; CHECK: vmovd -; CHECK-NEXT: movlhps -; CHECK-NEXT: vinsertf128 $1 -define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { -entry: - %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 - %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 - %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 - %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 - ret <4 x i64> %vecinit6.i -} - -; CHECK: vshufpd -; CHECK-NEXT: vinsertf128 $1 -define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { -entry: - %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 - %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 - %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 - %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 - ret <4 x double> %vecinit6.i -} - -; Test this simple opt: -; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> -; To: -; shuffle (vload ptr)), undef, <1, 1, 1, 1> -; CHECK: vmovaps -; CHECK-NEXT: vpextrd -define void @funcE() nounwind { -allocas: - %udx495 = alloca [18 x [18 x float]], align 32 - br label %for_test505.preheader - -for_test505.preheader: ; preds = %for_test505.preheader, %allocas - br i1 undef, label %for_exit499, label %for_test505.preheader - -for_exit499: ; preds = %for_test505.preheader - br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247 - -load.i1247: ; preds = %for_exit499 - %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1 - %ptr.i1237 = bitcast float* %ptr1227 to i32* - %val.i1238 = load i32* %ptr.i1237, align 4 - %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6 - %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7 - %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float> - br label %__load_and_broadcast_32.exit1249 - -__load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 - %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] - ret void -} - diff --git a/test/CodeGen/X86/avx-256-unpack.ll b/test/CodeGen/X86/avx-256-unpack.ll deleted file mode 100644 index 4e906ee1555..00000000000 --- a/test/CodeGen/X86/avx-256-unpack.ll +++ /dev/null @@ -1,58 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s - -; CHECK: vunpckhps -define <8 x float> @unpackhips(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp { -entry: - %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> - ret <8 x float> %shuffle.i -} - -; CHECK: vunpckhpd -define <4 x double> @unpackhipd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp { -entry: - %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> - ret <4 x double> %shuffle.i -} - -; CHECK: vunpcklps -define <8 x float> @unpacklops(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp { -entry: - %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> - ret <8 x float> %shuffle.i -} - -; CHECK: vunpcklpd -define <4 x double> @unpacklopd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp { -entry: - %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> - ret <4 x double> %shuffle.i -} - -; CHECK-NOT: vunpcklps %ymm -define <8 x float> @unpacklops-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp { -entry: - %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> - ret <8 x float> %shuffle.i -} - -; CHECK-NOT: vunpcklpd %ymm -define <4 x double> @unpacklopd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp { -entry: - %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> - ret <4 x double> %shuffle.i -} - -; CHECK-NOT: vunpckhps %ymm -define <8 x float> @unpackhips-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp { -entry: - %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> - ret <8 x float> %shuffle.i -} - -; CHECK-NOT: vunpckhpd %ymm -define <4 x double> @unpackhipd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp { -entry: - %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> - ret <4 x double> %shuffle.i -} - diff --git a/test/CodeGen/X86/avx-256.ll b/test/CodeGen/X86/avx-256.ll deleted file mode 100644 index 337f1429ee6..00000000000 --- a/test/CodeGen/X86/avx-256.ll +++ /dev/null @@ -1,35 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s - -@x = common global <8 x float> zeroinitializer, align 32 -@y = common global <4 x double> zeroinitializer, align 32 - -define void @zero() nounwind ssp { -entry: - ; CHECK: vxorps - ; CHECK: vmovaps - ; CHECK: vmovaps - store <8 x float> zeroinitializer, <8 x float>* @x, align 32 - store <4 x double> zeroinitializer, <4 x double>* @y, align 32 - ret void -} - -; CHECK: vpcmpeqd -; CHECK: vinsertf128 $1 -define void @ones([0 x float]* nocapture %RET, [0 x float]* nocapture %aFOO) nounwind { -allocas: - %ptr2vec615 = bitcast [0 x float]* %RET to <8 x float>* - store <8 x float> , <8 x -float>* %ptr2vec615, align 32 - ret void -} - -; CHECK: vpcmpeqd -; CHECK: vinsertf128 $1 -define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind { -allocas: - %ptr2vec615 = bitcast [0 x i32]* %RET to <8 x i32>* - store <8 x i32> , <8 x i32>* %ptr2vec615, align 32 - ret void -} diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll new file mode 100644 index 00000000000..553e8acda97 --- /dev/null +++ b/test/CodeGen/X86/avx-arith.ll @@ -0,0 +1,133 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK: vaddpd +define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %add.i = fadd <4 x double> %x, %y + ret <4 x double> %add.i +} + +; CHECK: vaddpd LCP{{.*}}(%rip) +define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { +entry: + %add.i = fadd <4 x double> %y, + ret <4 x double> %add.i +} + +; CHECK: vaddps +define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %add.i = fadd <8 x float> %x, %y + ret <8 x float> %add.i +} + +; CHECK: vaddps LCP{{.*}}(%rip) +define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp { +entry: + %add.i = fadd <8 x float> %y, + ret <8 x float> %add.i +} + +; CHECK: vsubpd +define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %sub.i = fsub <4 x double> %x, %y + ret <4 x double> %sub.i +} + +; CHECK: vsubpd (% +define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp { +entry: + %tmp2 = load <4 x double>* %x, align 32 + %sub.i = fsub <4 x double> %y, %tmp2 + ret <4 x double> %sub.i +} + +; CHECK: vsubps +define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %sub.i = fsub <8 x float> %x, %y + ret <8 x float> %sub.i +} + +; CHECK: vsubps (% +define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp { +entry: + %tmp2 = load <8 x float>* %x, align 32 + %sub.i = fsub <8 x float> %y, %tmp2 + ret <8 x float> %sub.i +} + +; CHECK: vmulpd +define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %mul.i = fmul <4 x double> %x, %y + ret <4 x double> %mul.i +} + +; CHECK: vmulpd LCP{{.*}}(%rip) +define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { +entry: + %mul.i = fmul <4 x double> %y, + ret <4 x double> %mul.i +} + +; CHECK: vmulps +define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %mul.i = fmul <8 x float> %x, %y + ret <8 x float> %mul.i +} + +; CHECK: vmulps LCP{{.*}}(%rip) +define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp { +entry: + %mul.i = fmul <8 x float> %y, + ret <8 x float> %mul.i +} + +; CHECK: vdivpd +define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %div.i = fdiv <4 x double> %x, %y + ret <4 x double> %div.i +} + +; CHECK: vdivpd LCP{{.*}}(%rip) +define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { +entry: + %div.i = fdiv <4 x double> %y, + ret <4 x double> %div.i +} + +; CHECK: vdivps +define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %div.i = fdiv <8 x float> %x, %y + ret <8 x float> %div.i +} + +; CHECK: vdivps LCP{{.*}}(%rip) +define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp { +entry: + %div.i = fdiv <8 x float> %y, + ret <8 x float> %div.i +} + +; CHECK: vsqrtss +define float @sqrtA(float %a) nounwind uwtable readnone ssp { +entry: + %conv1 = tail call float @sqrtf(float %a) nounwind readnone + ret float %conv1 +} + +declare double @sqrt(double) readnone + +; CHECK: vsqrtsd +define double @sqrtB(double %a) nounwind uwtable readnone ssp { +entry: + %call = tail call double @sqrt(double %a) nounwind readnone + ret double %call +} + +declare float @sqrtf(float) readnone diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll new file mode 100644 index 00000000000..8a24a58194b --- /dev/null +++ b/test/CodeGen/X86/avx-basic.ll @@ -0,0 +1,44 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +@x = common global <8 x float> zeroinitializer, align 32 +@y = common global <4 x double> zeroinitializer, align 32 +@z = common global <4 x float> zeroinitializer, align 16 + +define void @zero128() nounwind ssp { +entry: + ; CHECK: vxorps + ; CHECK: vmovaps + store <4 x float> zeroinitializer, <4 x float>* @z, align 16 + ret void +} + +define void @zero256() nounwind ssp { +entry: + ; CHECK: vxorps + ; CHECK: vmovaps + ; CHECK: vmovaps + store <8 x float> zeroinitializer, <8 x float>* @x, align 32 + store <4 x double> zeroinitializer, <4 x double>* @y, align 32 + ret void +} + +; CHECK: vpcmpeqd +; CHECK: vinsertf128 $1 +define void @ones([0 x float]* nocapture %RET, [0 x float]* nocapture %aFOO) nounwind { +allocas: + %ptr2vec615 = bitcast [0 x float]* %RET to <8 x float>* + store <8 x float> , <8 x +float>* %ptr2vec615, align 32 + ret void +} + +; CHECK: vpcmpeqd +; CHECK: vinsertf128 $1 +define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind { +allocas: + %ptr2vec615 = bitcast [0 x i32]* %RET to <8 x i32>* + store <8 x i32> , <8 x i32>* %ptr2vec615, align 32 + ret void +} diff --git a/test/CodeGen/X86/avx-cmp-fp.ll b/test/CodeGen/X86/avx-cmp-fp.ll deleted file mode 100644 index b10d9aeceab..00000000000 --- a/test/CodeGen/X86/avx-cmp-fp.ll +++ /dev/null @@ -1,28 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s - -declare void @scale() nounwind uwtable - -; CHECK: vucomisd -define void @render() nounwind uwtable { -entry: - br i1 undef, label %for.cond5, label %for.end52 - -for.cond5: - %or.cond = and i1 undef, false - br i1 %or.cond, label %for.body33, label %for.cond5 - -for.cond30: - br i1 false, label %for.body33, label %for.cond5 - -for.body33: - %tobool = fcmp une double undef, 0.000000e+00 - br i1 %tobool, label %if.then, label %for.cond30 - -if.then: - call void @scale() - br label %for.cond30 - -for.end52: - ret void -} - diff --git a/test/CodeGen/X86/avx-cmp.ll b/test/CodeGen/X86/avx-cmp.ll new file mode 100644 index 00000000000..c90e2251183 --- /dev/null +++ b/test/CodeGen/X86/avx-cmp.ll @@ -0,0 +1,44 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK: vcmpltps %ymm +; CHECK-NOT: vucomiss +define <8 x i32> @cmp00(<8 x float> %a, <8 x float> %b) nounwind readnone { + %bincmp = fcmp olt <8 x float> %a, %b + %s = sext <8 x i1> %bincmp to <8 x i32> + ret <8 x i32> %s +} + +; CHECK: vcmpltpd %ymm +; CHECK-NOT: vucomisd +define <4 x i64> @cmp01(<4 x double> %a, <4 x double> %b) nounwind readnone { + %bincmp = fcmp olt <4 x double> %a, %b + %s = sext <4 x i1> %bincmp to <4 x i64> + ret <4 x i64> %s +} + +declare void @scale() nounwind uwtable + +; CHECK: vucomisd +define void @render() nounwind uwtable { +entry: + br i1 undef, label %for.cond5, label %for.end52 + +for.cond5: + %or.cond = and i1 undef, false + br i1 %or.cond, label %for.body33, label %for.cond5 + +for.cond30: + br i1 false, label %for.body33, label %for.cond5 + +for.body33: + %tobool = fcmp une double undef, 0.000000e+00 + br i1 %tobool, label %if.then, label %for.cond30 + +if.then: + call void @scale() + br label %for.cond30 + +for.end52: + ret void +} + diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll new file mode 100644 index 00000000000..6da47af5ab5 --- /dev/null +++ b/test/CodeGen/X86/avx-cvt.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK: vcvtdq2ps %ymm +define <8 x float> @sitofp00(<8 x i32> %a) nounwind { + %b = sitofp <8 x i32> %a to <8 x float> + ret <8 x float> %b +} + +; CHECK: vcvttps2dq %ymm +define <8 x i32> @fptosi00(<8 x float> %a) nounwind { + %b = fptosi <8 x float> %a to <8 x i32> + ret <8 x i32> %b +} + +; CHECK: vcvtpd2psy %ymm +; CHECK-NEXT: vcvtpd2psy %ymm +; CHECK-NEXT: vinsertf128 $1 +define <8 x float> @fptrunc00(<8 x double> %b) nounwind { + %a = fptrunc <8 x double> %b to <8 x float> + ret <8 x float> %a +} + +; CHECK: vcvtsi2sdq (% +define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp { +entry: + %tmp1 = load i64* %e, align 8 + %conv = sitofp i64 %tmp1 to double + ret double %conv +} + +; CHECK: vcvtsi2sd (% +define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp { +entry: + %tmp1 = load i32* %e, align 4 + %conv = sitofp i32 %tmp1 to double + ret double %conv +} + +; CHECK: vcvtsi2ss (% +define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp { +entry: + %tmp1 = load i32* %e, align 4 + %conv = sitofp i32 %tmp1 to float + ret float %conv +} + +; CHECK: vcvtsi2ssq (% +define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp { +entry: + %tmp1 = load i64* %e, align 8 + %conv = sitofp i64 %tmp1 to float + ret float %conv +} + +; CHECK: vcvtss2sd +define void @fpext() nounwind uwtable { +entry: + %f = alloca float, align 4 + %d = alloca double, align 8 + %tmp = load float* %f, align 4 + %conv = fpext float %tmp to double + store double %conv, double* %d, align 8 + ret void +} + diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll new file mode 100644 index 00000000000..d9e5d081fb1 --- /dev/null +++ b/test/CodeGen/X86/avx-logic.ll @@ -0,0 +1,161 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK: vandpd +define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %x to <4 x i64> + %1 = bitcast <4 x double> %y to <4 x i64> + %and.i = and <4 x i64> %0, %1 + %2 = bitcast <4 x i64> %and.i to <4 x double> + ret <4 x double> %2 +} + +; CHECK: vandpd LCP{{.*}}(%rip) +define <4 x double> @andpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %y to <4 x i64> + %and.i = and <4 x i64> %0, + %1 = bitcast <4 x i64> %and.i to <4 x double> + ret <4 x double> %1 +} + +; CHECK: vandps +define <8 x float> @andps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %x to <8 x i32> + %1 = bitcast <8 x float> %y to <8 x i32> + %and.i = and <8 x i32> %0, %1 + %2 = bitcast <8 x i32> %and.i to <8 x float> + ret <8 x float> %2 +} + +; CHECK: vandps LCP{{.*}}(%rip) +define <8 x float> @andps256fold(<8 x float> %y) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %y to <8 x i32> + %and.i = and <8 x i32> %0, + %1 = bitcast <8 x i32> %and.i to <8 x float> + ret <8 x float> %1 +} + +; CHECK: vxorpd +define <4 x double> @xorpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %x to <4 x i64> + %1 = bitcast <4 x double> %y to <4 x i64> + %xor.i = xor <4 x i64> %0, %1 + %2 = bitcast <4 x i64> %xor.i to <4 x double> + ret <4 x double> %2 +} + +; CHECK: vxorpd LCP{{.*}}(%rip) +define <4 x double> @xorpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %y to <4 x i64> + %xor.i = xor <4 x i64> %0, + %1 = bitcast <4 x i64> %xor.i to <4 x double> + ret <4 x double> %1 +} + +; CHECK: vxorps +define <8 x float> @xorps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %x to <8 x i32> + %1 = bitcast <8 x float> %y to <8 x i32> + %xor.i = xor <8 x i32> %0, %1 + %2 = bitcast <8 x i32> %xor.i to <8 x float> + ret <8 x float> %2 +} + +; CHECK: vxorps LCP{{.*}}(%rip) +define <8 x float> @xorps256fold(<8 x float> %y) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %y to <8 x i32> + %xor.i = xor <8 x i32> %0, + %1 = bitcast <8 x i32> %xor.i to <8 x float> + ret <8 x float> %1 +} + +; CHECK: vorpd +define <4 x double> @orpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %x to <4 x i64> + %1 = bitcast <4 x double> %y to <4 x i64> + %or.i = or <4 x i64> %0, %1 + %2 = bitcast <4 x i64> %or.i to <4 x double> + ret <4 x double> %2 +} + +; CHECK: vorpd LCP{{.*}}(%rip) +define <4 x double> @orpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %y to <4 x i64> + %or.i = or <4 x i64> %0, + %1 = bitcast <4 x i64> %or.i to <4 x double> + ret <4 x double> %1 +} + +; CHECK: vorps +define <8 x float> @orps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %x to <8 x i32> + %1 = bitcast <8 x float> %y to <8 x i32> + %or.i = or <8 x i32> %0, %1 + %2 = bitcast <8 x i32> %or.i to <8 x float> + ret <8 x float> %2 +} + +; CHECK: vorps LCP{{.*}}(%rip) +define <8 x float> @orps256fold(<8 x float> %y) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %y to <8 x i32> + %or.i = or <8 x i32> %0, + %1 = bitcast <8 x i32> %or.i to <8 x float> + ret <8 x float> %1 +} + +; CHECK: vandnpd +define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %x to <4 x i64> + %neg.i = xor <4 x i64> %0, + %1 = bitcast <4 x double> %y to <4 x i64> + %and.i = and <4 x i64> %1, %neg.i + %2 = bitcast <4 x i64> %and.i to <4 x double> + ret <4 x double> %2 +} + +; CHECK: vandnpd (% +define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp { +entry: + %tmp2 = load <4 x double>* %x, align 32 + %0 = bitcast <4 x double> %y to <4 x i64> + %neg.i = xor <4 x i64> %0, + %1 = bitcast <4 x double> %tmp2 to <4 x i64> + %and.i = and <4 x i64> %1, %neg.i + %2 = bitcast <4 x i64> %and.i to <4 x double> + ret <4 x double> %2 +} + +; CHECK: vandnps +define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %x to <8 x i32> + %neg.i = xor <8 x i32> %0, + %1 = bitcast <8 x float> %y to <8 x i32> + %and.i = and <8 x i32> %1, %neg.i + %2 = bitcast <8 x i32> %and.i to <8 x float> + ret <8 x float> %2 +} + +; CHECK: vandnps (% +define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp { +entry: + %tmp2 = load <8 x float>* %x, align 32 + %0 = bitcast <8 x float> %y to <8 x i32> + %neg.i = xor <8 x i32> %0, + %1 = bitcast <8 x float> %tmp2 to <8 x i32> + %and.i = and <8 x i32> %1, %neg.i + %2 = bitcast <8 x i32> %and.i to <8 x float> + ret <8 x float> %2 +} diff --git a/test/CodeGen/X86/avx-movdup.ll b/test/CodeGen/X86/avx-movdup.ll new file mode 100644 index 00000000000..42d84def98a --- /dev/null +++ b/test/CodeGen/X86/avx-movdup.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK: vmovsldup +define <8 x float> @movdupA(<8 x float> %src) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> + ret <8 x float> %shuffle.i +} + +; CHECK: vmovshdup +define <8 x float> @movdupB(<8 x float> %src) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> + ret <8 x float> %shuffle.i +} + +; CHECK: vmovsldup +define <4 x i64> @movdupC(<4 x i64> %src) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x i64> %src to <8 x float> + %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> + %1 = bitcast <8 x float> %shuffle.i to <4 x i64> + ret <4 x i64> %1 +} + +; CHECK: vmovshdup +define <4 x i64> @movdupD(<4 x i64> %src) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x i64> %src to <8 x float> + %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> + %1 = bitcast <8 x float> %shuffle.i to <4 x i64> + ret <4 x i64> %1 +} + diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll new file mode 100644 index 00000000000..36d469417f9 --- /dev/null +++ b/test/CodeGen/X86/avx-splat.ll @@ -0,0 +1,79 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd + +; CHECK: vextractf128 $0 +; CHECK-NEXT: punpcklbw +; CHECK-NEXT: punpckhbw +; CHECK-NEXT: vinsertf128 $1 +; CHECK-NEXT: vpermilps $85 +define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> + ret <32 x i8> %shuffle +} + +; CHECK: vextractf128 $0 +; CHECK-NEXT: punpckhwd +; CHECK-NEXT: vinsertf128 $1 +; CHECK-NEXT: vpermilps $85 +define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> + ret <16 x i16> %shuffle +} + +; CHECK: vmovd +; CHECK-NEXT: movlhps +; CHECK-NEXT: vinsertf128 $1 +define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { +entry: + %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 + %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 + %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 + %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 + ret <4 x i64> %vecinit6.i +} + +; CHECK: vshufpd +; CHECK-NEXT: vinsertf128 $1 +define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { +entry: + %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 + %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 + %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 + %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 + ret <4 x double> %vecinit6.i +} + +; Test this simple opt: +; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> +; To: +; shuffle (vload ptr)), undef, <1, 1, 1, 1> +; CHECK: vmovaps +; CHECK-NEXT: vpextrd +define void @funcE() nounwind { +allocas: + %udx495 = alloca [18 x [18 x float]], align 32 + br label %for_test505.preheader + +for_test505.preheader: ; preds = %for_test505.preheader, %allocas + br i1 undef, label %for_exit499, label %for_test505.preheader + +for_exit499: ; preds = %for_test505.preheader + br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247 + +load.i1247: ; preds = %for_exit499 + %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1 + %ptr.i1237 = bitcast float* %ptr1227 to i32* + %val.i1238 = load i32* %ptr.i1237, align 4 + %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6 + %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7 + %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float> + br label %__load_and_broadcast_32.exit1249 + +__load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 + %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] + ret void +} + diff --git a/test/CodeGen/X86/avx-unpack.ll b/test/CodeGen/X86/avx-unpack.ll new file mode 100644 index 00000000000..4e906ee1555 --- /dev/null +++ b/test/CodeGen/X86/avx-unpack.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK: vunpckhps +define <8 x float> @unpackhips(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> + ret <8 x float> %shuffle.i +} + +; CHECK: vunpckhpd +define <4 x double> @unpackhipd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> + ret <4 x double> %shuffle.i +} + +; CHECK: vunpcklps +define <8 x float> @unpacklops(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> + ret <8 x float> %shuffle.i +} + +; CHECK: vunpcklpd +define <4 x double> @unpacklopd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> + ret <4 x double> %shuffle.i +} + +; CHECK-NOT: vunpcklps %ymm +define <8 x float> @unpacklops-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> + ret <8 x float> %shuffle.i +} + +; CHECK-NOT: vunpcklpd %ymm +define <4 x double> @unpacklopd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> + ret <4 x double> %shuffle.i +} + +; CHECK-NOT: vunpckhps %ymm +define <8 x float> @unpackhips-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> + ret <8 x float> %shuffle.i +} + +; CHECK-NOT: vunpckhpd %ymm +define <4 x double> @unpackhipd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> + ret <4 x double> %shuffle.i +} +