From: Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Date: Tue, 9 Aug 2011 03:04:23 +0000 (+0000)
Subject: Rename and tidy up tests
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=b33ea564485b430508ab94cf085fd47d7f0e49b0;p=oota-llvm.git

Rename and tidy up tests

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137103 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/test/CodeGen/X86/avx-128.ll b/test/CodeGen/X86/avx-128.ll
deleted file mode 100644
index fe1472f4548..00000000000
--- a/test/CodeGen/X86/avx-128.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-@z = common global <4 x float> zeroinitializer, align 16
-
-define void @zero() nounwind ssp {
-entry:
-  ; CHECK: vxorps
-  ; CHECK: vmovaps
-  store <4 x float> zeroinitializer, <4 x float>* @z, align 16
-  ret void
-}
-
-define void @fpext() nounwind uwtable {
-entry:
-  %f = alloca float, align 4
-  %d = alloca double, align 8
-  %tmp = load float* %f, align 4
-  ; CHECK: vcvtss2sd
-  %conv = fpext float %tmp to double
-  store double %conv, double* %d, align 8
-  ret void
-}
-
-; CHECK: vcvtsi2sdq (%
-define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
-entry:
-  %tmp1 = load i64* %e, align 8
-  %conv = sitofp i64 %tmp1 to double
-  ret double %conv
-}
-
-; CHECK: vcvtsi2sd (%
-define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
-entry:
-  %tmp1 = load i32* %e, align 4
-  %conv = sitofp i32 %tmp1 to double
-  ret double %conv
-}
-
-; CHECK: vcvtsi2ss (%
-define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
-entry:
-  %tmp1 = load i32* %e, align 4
-  %conv = sitofp i32 %tmp1 to float
-  ret float %conv
-}
-
-; CHECK: vcvtsi2ssq  (%
-define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
-entry:
-  %tmp1 = load i64* %e, align 8
-  %conv = sitofp i64 %tmp1 to float
-  ret float %conv
-}
-
-; CHECK: vsqrtss
-define float @sqrtA(float %a) nounwind uwtable readnone ssp {
-entry:
-  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
-  ret float %conv1
-}
-
-declare double @sqrt(double) readnone
-
-; CHECK: vsqrtsd
-define double @sqrtB(double %a) nounwind uwtable readnone ssp {
-entry:
-  %call = tail call double @sqrt(double %a) nounwind readnone
-  ret double %call
-}
-
-declare float @sqrtf(float) readnone
diff --git a/test/CodeGen/X86/avx-256-arith.ll b/test/CodeGen/X86/avx-256-arith.ll
deleted file mode 100644
index 5c512db0e2a..00000000000
--- a/test/CodeGen/X86/avx-256-arith.ll
+++ /dev/null
@@ -1,116 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; CHECK: vaddpd
-define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
-entry:
-  %add.i = fadd <4 x double> %x, %y
-  ret <4 x double> %add.i
-}
-
-; CHECK: vaddpd LCP{{.*}}(%rip)
-define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
-entry:
-  %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
-  ret <4 x double> %add.i
-}
-
-; CHECK: vaddps
-define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
-entry:
-  %add.i = fadd <8 x float> %x, %y
-  ret <8 x float> %add.i
-}
-
-; CHECK: vaddps LCP{{.*}}(%rip)
-define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
-entry:
-  %add.i = fadd <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
-  ret <8 x float> %add.i
-}
-
-; CHECK: vsubpd
-define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
-entry:
-  %sub.i = fsub <4 x double> %x, %y
-  ret <4 x double> %sub.i
-}
-
-; CHECK: vsubpd (%
-define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
-entry:
-  %tmp2 = load <4 x double>* %x, align 32
-  %sub.i = fsub <4 x double> %y, %tmp2
-  ret <4 x double> %sub.i
-}
-
-; CHECK: vsubps
-define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
-entry:
-  %sub.i = fsub <8 x float> %x, %y
-  ret <8 x float> %sub.i
-}
-
-; CHECK: vsubps (%
-define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
-entry:
-  %tmp2 = load <8 x float>* %x, align 32
-  %sub.i = fsub <8 x float> %y, %tmp2
-  ret <8 x float> %sub.i
-}
-
-; CHECK: vmulpd
-define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
-entry:
-  %mul.i = fmul <4 x double> %x, %y
-  ret <4 x double> %mul.i
-}
-
-; CHECK: vmulpd LCP{{.*}}(%rip)
-define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
-entry:
-  %mul.i = fmul <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
-  ret <4 x double> %mul.i
-}
-
-; CHECK: vmulps
-define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
-entry:
-  %mul.i = fmul <8 x float> %x, %y
-  ret <8 x float> %mul.i
-}
-
-; CHECK: vmulps LCP{{.*}}(%rip)
-define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
-entry:
-  %mul.i = fmul <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
-  ret <8 x float> %mul.i
-}
-
-; CHECK: vdivpd
-define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
-entry:
-  %div.i = fdiv <4 x double> %x, %y
-  ret <4 x double> %div.i
-}
-
-; CHECK: vdivpd LCP{{.*}}(%rip)
-define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
-entry:
-  %div.i = fdiv <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
-  ret <4 x double> %div.i
-}
-
-; CHECK: vdivps
-define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
-entry:
-  %div.i = fdiv <8 x float> %x, %y
-  ret <8 x float> %div.i
-}
-
-; CHECK: vdivps LCP{{.*}}(%rip)
-define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
-entry:
-  %div.i = fdiv <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
-  ret <8 x float> %div.i
-}
-
diff --git a/test/CodeGen/X86/avx-256-cmp.ll b/test/CodeGen/X86/avx-256-cmp.ll
deleted file mode 100644
index 3323a5bd9cc..00000000000
--- a/test/CodeGen/X86/avx-256-cmp.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; CHECK: vcmpltps %ymm
-; CHECK-NOT: vucomiss
-define <8 x i32> @cmp00(<8 x float> %a, <8 x float> %b) nounwind readnone {
-  %bincmp = fcmp olt <8 x float> %a, %b
-  %s = sext <8 x i1> %bincmp to <8 x i32>
-  ret <8 x i32> %s
-}
-
-; CHECK: vcmpltpd %ymm
-; CHECK-NOT: vucomisd
-define <4 x i64> @cmp01(<4 x double> %a, <4 x double> %b) nounwind readnone {
-  %bincmp = fcmp olt <4 x double> %a, %b
-  %s = sext <4 x i1> %bincmp to <4 x i64>
-  ret <4 x i64> %s
-}
-
diff --git a/test/CodeGen/X86/avx-256-cvt.ll b/test/CodeGen/X86/avx-256-cvt.ll
deleted file mode 100644
index d97327906ca..00000000000
--- a/test/CodeGen/X86/avx-256-cvt.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; CHECK: vcvtdq2ps %ymm
-define <8 x float> @funcA(<8 x i32> %a) nounwind {
-  %b = sitofp <8 x i32> %a to <8 x float>
-  ret <8 x float> %b
-}
-
-; CHECK: vcvttps2dq %ymm
-define <8 x i32> @funcB(<8 x float> %a) nounwind {
-  %b = fptosi <8 x float> %a to <8 x i32>
-  ret <8 x i32> %b
-}
-
-; CHECK: vcvtpd2psy %ymm
-; CHECK-NEXT: vcvtpd2psy %ymm
-; CHECK-NEXT: vinsertf128 $1
-define <8 x float> @funcC(<8 x double> %b) nounwind {
-  %a = fptrunc <8 x double> %b to <8 x float>
-  ret <8 x float> %a
-}
diff --git a/test/CodeGen/X86/avx-256-logic.ll b/test/CodeGen/X86/avx-256-logic.ll
deleted file mode 100644
index d9e5d081fb1..00000000000
--- a/test/CodeGen/X86/avx-256-logic.ll
+++ /dev/null
@@ -1,161 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; CHECK: vandpd
-define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <4 x double> %x to <4 x i64>
-  %1 = bitcast <4 x double> %y to <4 x i64>
-  %and.i = and <4 x i64> %0, %1
-  %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
-}
-
-; CHECK: vandpd LCP{{.*}}(%rip)
-define <4 x double> @andpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <4 x double> %y to <4 x i64>
-  %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
-  %1 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %1
-}
-
-; CHECK: vandps
-define <8 x float> @andps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <8 x float> %x to <8 x i32>
-  %1 = bitcast <8 x float> %y to <8 x i32>
-  %and.i = and <8 x i32> %0, %1
-  %2 = bitcast <8 x i32> %and.i to <8 x float>
-  ret <8 x float> %2
-}
-
-; CHECK: vandps LCP{{.*}}(%rip)
-define <8 x float> @andps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <8 x float> %y to <8 x i32>
-  %and.i = and <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
-  %1 = bitcast <8 x i32> %and.i to <8 x float>
-  ret <8 x float> %1
-}
-
-; CHECK: vxorpd
-define <4 x double> @xorpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <4 x double> %x to <4 x i64>
-  %1 = bitcast <4 x double> %y to <4 x i64>
-  %xor.i = xor <4 x i64> %0, %1
-  %2 = bitcast <4 x i64> %xor.i to <4 x double>
-  ret <4 x double> %2
-}
-
-; CHECK: vxorpd LCP{{.*}}(%rip)
-define <4 x double> @xorpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <4 x double> %y to <4 x i64>
-  %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
-  %1 = bitcast <4 x i64> %xor.i to <4 x double>
-  ret <4 x double> %1
-}
-
-; CHECK: vxorps
-define <8 x float> @xorps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <8 x float> %x to <8 x i32>
-  %1 = bitcast <8 x float> %y to <8 x i32>
-  %xor.i = xor <8 x i32> %0, %1
-  %2 = bitcast <8 x i32> %xor.i to <8 x float>
-  ret <8 x float> %2
-}
-
-; CHECK: vxorps LCP{{.*}}(%rip)
-define <8 x float> @xorps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <8 x float> %y to <8 x i32>
-  %xor.i = xor <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
-  %1 = bitcast <8 x i32> %xor.i to <8 x float>
-  ret <8 x float> %1
-}
-
-; CHECK: vorpd
-define <4 x double> @orpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <4 x double> %x to <4 x i64>
-  %1 = bitcast <4 x double> %y to <4 x i64>
-  %or.i = or <4 x i64> %0, %1
-  %2 = bitcast <4 x i64> %or.i to <4 x double>
-  ret <4 x double> %2
-}
-
-; CHECK: vorpd LCP{{.*}}(%rip)
-define <4 x double> @orpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <4 x double> %y to <4 x i64>
-  %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
-  %1 = bitcast <4 x i64> %or.i to <4 x double>
-  ret <4 x double> %1
-}
-
-; CHECK: vorps
-define <8 x float> @orps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <8 x float> %x to <8 x i32>
-  %1 = bitcast <8 x float> %y to <8 x i32>
-  %or.i = or <8 x i32> %0, %1
-  %2 = bitcast <8 x i32> %or.i to <8 x float>
-  ret <8 x float> %2
-}
-
-; CHECK: vorps LCP{{.*}}(%rip)
-define <8 x float> @orps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <8 x float> %y to <8 x i32>
-  %or.i = or <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
-  %1 = bitcast <8 x i32> %or.i to <8 x float>
-  ret <8 x float> %1
-}
-
-; CHECK: vandnpd
-define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <4 x double> %x to <4 x i64>
-  %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
-  %1 = bitcast <4 x double> %y to <4 x i64>
-  %and.i = and <4 x i64> %1, %neg.i
-  %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
-}
-
-; CHECK: vandnpd (%
-define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
-entry:
-  %tmp2 = load <4 x double>* %x, align 32
-  %0 = bitcast <4 x double> %y to <4 x i64>
-  %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
-  %1 = bitcast <4 x double> %tmp2 to <4 x i64>
-  %and.i = and <4 x i64> %1, %neg.i
-  %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
-}
-
-; CHECK: vandnps
-define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <8 x float> %x to <8 x i32>
-  %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-  %1 = bitcast <8 x float> %y to <8 x i32>
-  %and.i = and <8 x i32> %1, %neg.i
-  %2 = bitcast <8 x i32> %and.i to <8 x float>
-  ret <8 x float> %2
-}
-
-; CHECK: vandnps (%
-define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
-entry:
-  %tmp2 = load <8 x float>* %x, align 32
-  %0 = bitcast <8 x float> %y to <8 x i32>
-  %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-  %1 = bitcast <8 x float> %tmp2 to <8 x i32>
-  %and.i = and <8 x i32> %1, %neg.i
-  %2 = bitcast <8 x i32> %and.i to <8 x float>
-  ret <8 x float> %2
-}
diff --git a/test/CodeGen/X86/avx-256-movdup.ll b/test/CodeGen/X86/avx-256-movdup.ll
deleted file mode 100644
index 42d84def98a..00000000000
--- a/test/CodeGen/X86/avx-256-movdup.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; CHECK: vmovsldup
-define <8 x float> @movdupA(<8 x float> %src) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
-  ret <8 x float> %shuffle.i
-}
-
-; CHECK: vmovshdup
-define <8 x float> @movdupB(<8 x float> %src) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
-  ret <8 x float> %shuffle.i
-}
-
-; CHECK: vmovsldup
-define <4 x i64> @movdupC(<4 x i64> %src) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <4 x i64> %src to <8 x float>
-  %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
-  %1 = bitcast <8 x float> %shuffle.i to <4 x i64>
-  ret <4 x i64> %1
-}
-
-; CHECK: vmovshdup
-define <4 x i64> @movdupD(<4 x i64> %src) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <4 x i64> %src to <8 x float>
-  %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
-  %1 = bitcast <8 x float> %shuffle.i to <4 x i64>
-  ret <4 x i64> %1
-}
-
diff --git a/test/CodeGen/X86/avx-256-splat.ll b/test/CodeGen/X86/avx-256-splat.ll
deleted file mode 100644
index 36d469417f9..00000000000
--- a/test/CodeGen/X86/avx-256-splat.ll
+++ /dev/null
@@ -1,79 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd
-
-; CHECK: vextractf128 $0
-; CHECK-NEXT: punpcklbw
-; CHECK-NEXT: punpckhbw
-; CHECK-NEXT: vinsertf128 $1
-; CHECK-NEXT: vpermilps $85
-define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <32 x i8> %shuffle
-}
-
-; CHECK: vextractf128 $0
-; CHECK-NEXT: punpckhwd
-; CHECK-NEXT: vinsertf128 $1
-; CHECK-NEXT: vpermilps $85
-define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <16 x i16> %shuffle
-}
-
-; CHECK: vmovd
-; CHECK-NEXT: movlhps
-; CHECK-NEXT: vinsertf128 $1
-define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
-entry:
-  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
-  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
-  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
-  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
-  ret <4 x i64> %vecinit6.i
-}
-
-; CHECK: vshufpd
-; CHECK-NEXT: vinsertf128 $1
-define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
-entry:
-  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
-  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
-  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
-  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
-  ret <4 x double> %vecinit6.i
-}
-
-; Test this simple opt:
-;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
-; To:
-;   shuffle (vload ptr)), undef, <1, 1, 1, 1>
-; CHECK: vmovaps
-; CHECK-NEXT: vpextrd
-define void @funcE() nounwind {
-allocas:
-  %udx495 = alloca [18 x [18 x float]], align 32
-  br label %for_test505.preheader
-
-for_test505.preheader:                            ; preds = %for_test505.preheader, %allocas
-  br i1 undef, label %for_exit499, label %for_test505.preheader
-
-for_exit499:                                      ; preds = %for_test505.preheader
-  br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
-
-load.i1247:                                       ; preds = %for_exit499
-  %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
-  %ptr.i1237 = bitcast float* %ptr1227 to i32*
-  %val.i1238 = load i32* %ptr.i1237, align 4
-  %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
-  %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
-  %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
-  br label %__load_and_broadcast_32.exit1249
-
-__load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_exit499
-  %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
-  ret void
-}
-
diff --git a/test/CodeGen/X86/avx-256-unpack.ll b/test/CodeGen/X86/avx-256-unpack.ll
deleted file mode 100644
index 4e906ee1555..00000000000
--- a/test/CodeGen/X86/avx-256-unpack.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; CHECK: vunpckhps
-define <8 x float> @unpackhips(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
-  ret <8 x float> %shuffle.i
-}
-
-; CHECK: vunpckhpd
-define <4 x double> @unpackhipd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-  ret <4 x double> %shuffle.i
-}
-
-; CHECK: vunpcklps
-define <8 x float> @unpacklops(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
-  ret <8 x float> %shuffle.i
-}
-
-; CHECK: vunpcklpd
-define <4 x double> @unpacklopd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-  ret <4 x double> %shuffle.i
-}
-
-; CHECK-NOT: vunpcklps %ymm
-define <8 x float> @unpacklops-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-  ret <8 x float> %shuffle.i
-}
-
-; CHECK-NOT: vunpcklpd %ymm
-define <4 x double> @unpacklopd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-  ret <4 x double> %shuffle.i
-}
-
-; CHECK-NOT: vunpckhps %ymm
-define <8 x float> @unpackhips-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13>
-  ret <8 x float> %shuffle.i
-}
-
-; CHECK-NOT: vunpckhpd %ymm
-define <4 x double> @unpackhipd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-  ret <4 x double> %shuffle.i
-}
-
diff --git a/test/CodeGen/X86/avx-256.ll b/test/CodeGen/X86/avx-256.ll
deleted file mode 100644
index 337f1429ee6..00000000000
--- a/test/CodeGen/X86/avx-256.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-@x = common global <8 x float> zeroinitializer, align 32
-@y = common global <4 x double> zeroinitializer, align 32
-
-define void @zero() nounwind ssp {
-entry:
-  ; CHECK: vxorps
-  ; CHECK: vmovaps
-  ; CHECK: vmovaps
-  store <8 x float> zeroinitializer, <8 x float>* @x, align 32
-  store <4 x double> zeroinitializer, <4 x double>* @y, align 32
-  ret void
-}
-
-; CHECK: vpcmpeqd
-; CHECK: vinsertf128 $1
-define void @ones([0 x float]* nocapture %RET, [0 x float]* nocapture %aFOO) nounwind {
-allocas:
-  %ptr2vec615 = bitcast [0 x float]* %RET to <8 x float>*
-  store <8 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float
-0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float
-0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <8 x
-float>* %ptr2vec615, align 32
-  ret void
-}
-
-; CHECK: vpcmpeqd
-; CHECK: vinsertf128 $1
-define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind {
-allocas:
-  %ptr2vec615 = bitcast [0 x i32]* %RET to <8 x i32>*
-  store <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32>* %ptr2vec615, align 32
-  ret void
-}
diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll
new file mode 100644
index 00000000000..553e8acda97
--- /dev/null
+++ b/test/CodeGen/X86/avx-arith.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vaddpd
+define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <4 x double> %x, %y
+  ret <4 x double> %add.i
+}
+
+; CHECK: vaddpd LCP{{.*}}(%rip)
+define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <4 x double> %add.i
+}
+
+; CHECK: vaddps
+define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <8 x float> %x, %y
+  ret <8 x float> %add.i
+}
+
+; CHECK: vaddps LCP{{.*}}(%rip)
+define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <8 x float> %add.i
+}
+
+; CHECK: vsubpd
+define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %sub.i = fsub <4 x double> %x, %y
+  ret <4 x double> %sub.i
+}
+
+; CHECK: vsubpd (%
+define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+  %tmp2 = load <4 x double>* %x, align 32
+  %sub.i = fsub <4 x double> %y, %tmp2
+  ret <4 x double> %sub.i
+}
+
+; CHECK: vsubps
+define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %sub.i = fsub <8 x float> %x, %y
+  ret <8 x float> %sub.i
+}
+
+; CHECK: vsubps (%
+define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+  %tmp2 = load <8 x float>* %x, align 32
+  %sub.i = fsub <8 x float> %y, %tmp2
+  ret <8 x float> %sub.i
+}
+
+; CHECK: vmulpd
+define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %mul.i = fmul <4 x double> %x, %y
+  ret <4 x double> %mul.i
+}
+
+; CHECK: vmulpd LCP{{.*}}(%rip)
+define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %mul.i = fmul <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <4 x double> %mul.i
+}
+
+; CHECK: vmulps
+define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %mul.i = fmul <8 x float> %x, %y
+  ret <8 x float> %mul.i
+}
+
+; CHECK: vmulps LCP{{.*}}(%rip)
+define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %mul.i = fmul <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <8 x float> %mul.i
+}
+
+; CHECK: vdivpd
+define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %div.i = fdiv <4 x double> %x, %y
+  ret <4 x double> %div.i
+}
+
+; CHECK: vdivpd LCP{{.*}}(%rip)
+define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %div.i = fdiv <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <4 x double> %div.i
+}
+
+; CHECK: vdivps
+define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %div.i = fdiv <8 x float> %x, %y
+  ret <8 x float> %div.i
+}
+
+; CHECK: vdivps LCP{{.*}}(%rip)
+define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %div.i = fdiv <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <8 x float> %div.i
+}
+
+; CHECK: vsqrtss
+define float @sqrtA(float %a) nounwind uwtable readnone ssp {
+entry:
+  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
+  ret float %conv1
+}
+
+declare double @sqrt(double) readnone
+
+; CHECK: vsqrtsd
+define double @sqrtB(double %a) nounwind uwtable readnone ssp {
+entry:
+  %call = tail call double @sqrt(double %a) nounwind readnone
+  ret double %call
+}
+
+declare float @sqrtf(float) readnone
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll
new file mode 100644
index 00000000000..8a24a58194b
--- /dev/null
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+@x = common global <8 x float> zeroinitializer, align 32
+@y = common global <4 x double> zeroinitializer, align 32
+@z = common global <4 x float> zeroinitializer, align 16
+
+define void @zero128() nounwind ssp {
+entry:
+  ; CHECK: vxorps
+  ; CHECK: vmovaps
+  store <4 x float> zeroinitializer, <4 x float>* @z, align 16
+  ret void
+}
+
+define void @zero256() nounwind ssp {
+entry:
+  ; CHECK: vxorps
+  ; CHECK: vmovaps
+  ; CHECK: vmovaps
+  store <8 x float> zeroinitializer, <8 x float>* @x, align 32
+  store <4 x double> zeroinitializer, <4 x double>* @y, align 32
+  ret void
+}
+
+; CHECK: vpcmpeqd
+; CHECK: vinsertf128 $1
+define void @ones([0 x float]* nocapture %RET, [0 x float]* nocapture %aFOO) nounwind {
+allocas:
+  %ptr2vec615 = bitcast [0 x float]* %RET to <8 x float>*
+  store <8 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float
+0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float
+0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <8 x
+float>* %ptr2vec615, align 32
+  ret void
+}
+
+; CHECK: vpcmpeqd
+; CHECK: vinsertf128 $1
+define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind {
+allocas:
+  %ptr2vec615 = bitcast [0 x i32]* %RET to <8 x i32>*
+  store <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32>* %ptr2vec615, align 32
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-cmp-fp.ll b/test/CodeGen/X86/avx-cmp-fp.ll
deleted file mode 100644
index b10d9aeceab..00000000000
--- a/test/CodeGen/X86/avx-cmp-fp.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-declare void @scale() nounwind uwtable
-
-; CHECK: vucomisd
-define void @render() nounwind uwtable {
-entry:
-  br i1 undef, label %for.cond5, label %for.end52
-
-for.cond5:
-  %or.cond = and i1 undef, false
-  br i1 %or.cond, label %for.body33, label %for.cond5
-
-for.cond30:
-  br i1 false, label %for.body33, label %for.cond5
-
-for.body33:
-  %tobool = fcmp une double undef, 0.000000e+00
-  br i1 %tobool, label %if.then, label %for.cond30
-
-if.then:
-  call void @scale()
-  br label %for.cond30
-
-for.end52:
-  ret void
-}
-
diff --git a/test/CodeGen/X86/avx-cmp.ll b/test/CodeGen/X86/avx-cmp.ll
new file mode 100644
index 00000000000..c90e2251183
--- /dev/null
+++ b/test/CodeGen/X86/avx-cmp.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vcmpltps %ymm
+; CHECK-NOT: vucomiss
+define <8 x i32> @cmp00(<8 x float> %a, <8 x float> %b) nounwind readnone {
+  %bincmp = fcmp olt <8 x float> %a, %b
+  %s = sext <8 x i1> %bincmp to <8 x i32>
+  ret <8 x i32> %s
+}
+
+; CHECK: vcmpltpd %ymm
+; CHECK-NOT: vucomisd
+define <4 x i64> @cmp01(<4 x double> %a, <4 x double> %b) nounwind readnone {
+  %bincmp = fcmp olt <4 x double> %a, %b
+  %s = sext <4 x i1> %bincmp to <4 x i64>
+  ret <4 x i64> %s
+}
+
+declare void @scale() nounwind uwtable
+
+; CHECK: vucomisd
+define void @render() nounwind uwtable {
+entry:
+  br i1 undef, label %for.cond5, label %for.end52
+
+for.cond5:
+  %or.cond = and i1 undef, false
+  br i1 %or.cond, label %for.body33, label %for.cond5
+
+for.cond30:
+  br i1 false, label %for.body33, label %for.cond5
+
+for.body33:
+  %tobool = fcmp une double undef, 0.000000e+00
+  br i1 %tobool, label %if.then, label %for.cond30
+
+if.then:
+  call void @scale()
+  br label %for.cond30
+
+for.end52:
+  ret void
+}
+
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
new file mode 100644
index 00000000000..6da47af5ab5
--- /dev/null
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vcvtdq2ps %ymm
+define <8 x float> @sitofp00(<8 x i32> %a) nounwind {
+  %b = sitofp <8 x i32> %a to <8 x float>
+  ret <8 x float> %b
+}
+
+; CHECK: vcvttps2dq %ymm
+define <8 x i32> @fptosi00(<8 x float> %a) nounwind {
+  %b = fptosi <8 x float> %a to <8 x i32>
+  ret <8 x i32> %b
+}
+
+; CHECK: vcvtpd2psy %ymm
+; CHECK-NEXT: vcvtpd2psy %ymm
+; CHECK-NEXT: vinsertf128 $1
+define <8 x float> @fptrunc00(<8 x double> %b) nounwind {
+  %a = fptrunc <8 x double> %b to <8 x float>
+  ret <8 x float> %a
+}
+
+; CHECK: vcvtsi2sdq (%
+define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
+entry:
+  %tmp1 = load i64* %e, align 8
+  %conv = sitofp i64 %tmp1 to double
+  ret double %conv
+}
+
+; CHECK: vcvtsi2sd (%
+define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
+entry:
+  %tmp1 = load i32* %e, align 4
+  %conv = sitofp i32 %tmp1 to double
+  ret double %conv
+}
+
+; CHECK: vcvtsi2ss (%
+define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
+entry:
+  %tmp1 = load i32* %e, align 4
+  %conv = sitofp i32 %tmp1 to float
+  ret float %conv
+}
+
+; CHECK: vcvtsi2ssq  (%
+define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
+entry:
+  %tmp1 = load i64* %e, align 8
+  %conv = sitofp i64 %tmp1 to float
+  ret float %conv
+}
+
+; CHECK: vcvtss2sd
+define void @fpext() nounwind uwtable {
+entry:
+  %f = alloca float, align 4
+  %d = alloca double, align 8
+  %tmp = load float* %f, align 4
+  %conv = fpext float %tmp to double
+  store double %conv, double* %d, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll
new file mode 100644
index 00000000000..d9e5d081fb1
--- /dev/null
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -0,0 +1,161 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vandpd
+define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %x to <4 x i64>
+  %1 = bitcast <4 x double> %y to <4 x i64>
+  %and.i = and <4 x i64> %0, %1
+  %2 = bitcast <4 x i64> %and.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vandpd LCP{{.*}}(%rip)
+define <4 x double> @andpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %y to <4 x i64>
+  %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
+  %1 = bitcast <4 x i64> %and.i to <4 x double>
+  ret <4 x double> %1
+}
+
+; CHECK: vandps
+define <8 x float> @andps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %x to <8 x i32>
+  %1 = bitcast <8 x float> %y to <8 x i32>
+  %and.i = and <8 x i32> %0, %1
+  %2 = bitcast <8 x i32> %and.i to <8 x float>
+  ret <8 x float> %2
+}
+
+; CHECK: vandps LCP{{.*}}(%rip)
+define <8 x float> @andps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %y to <8 x i32>
+  %and.i = and <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
+  %1 = bitcast <8 x i32> %and.i to <8 x float>
+  ret <8 x float> %1
+}
+
+; CHECK: vxorpd
+define <4 x double> @xorpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %x to <4 x i64>
+  %1 = bitcast <4 x double> %y to <4 x i64>
+  %xor.i = xor <4 x i64> %0, %1
+  %2 = bitcast <4 x i64> %xor.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vxorpd LCP{{.*}}(%rip)
+define <4 x double> @xorpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %y to <4 x i64>
+  %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
+  %1 = bitcast <4 x i64> %xor.i to <4 x double>
+  ret <4 x double> %1
+}
+
+; CHECK: vxorps
+define <8 x float> @xorps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %x to <8 x i32>
+  %1 = bitcast <8 x float> %y to <8 x i32>
+  %xor.i = xor <8 x i32> %0, %1
+  %2 = bitcast <8 x i32> %xor.i to <8 x float>
+  ret <8 x float> %2
+}
+
+; CHECK: vxorps LCP{{.*}}(%rip)
+define <8 x float> @xorps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %y to <8 x i32>
+  %xor.i = xor <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
+  %1 = bitcast <8 x i32> %xor.i to <8 x float>
+  ret <8 x float> %1
+}
+
+; CHECK: vorpd
+define <4 x double> @orpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %x to <4 x i64>
+  %1 = bitcast <4 x double> %y to <4 x i64>
+  %or.i = or <4 x i64> %0, %1
+  %2 = bitcast <4 x i64> %or.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vorpd LCP{{.*}}(%rip)
+define <4 x double> @orpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %y to <4 x i64>
+  %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
+  %1 = bitcast <4 x i64> %or.i to <4 x double>
+  ret <4 x double> %1
+}
+
+; CHECK: vorps
+define <8 x float> @orps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %x to <8 x i32>
+  %1 = bitcast <8 x float> %y to <8 x i32>
+  %or.i = or <8 x i32> %0, %1
+  %2 = bitcast <8 x i32> %or.i to <8 x float>
+  ret <8 x float> %2
+}
+
+; CHECK: vorps LCP{{.*}}(%rip)
+define <8 x float> @orps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %y to <8 x i32>
+  %or.i = or <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
+  %1 = bitcast <8 x i32> %or.i to <8 x float>
+  ret <8 x float> %1
+}
+
+; CHECK: vandnpd
+define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %x to <4 x i64>
+  %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %1 = bitcast <4 x double> %y to <4 x i64>
+  %and.i = and <4 x i64> %1, %neg.i
+  %2 = bitcast <4 x i64> %and.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vandnpd (%
+define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+  %tmp2 = load <4 x double>* %x, align 32
+  %0 = bitcast <4 x double> %y to <4 x i64>
+  %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %1 = bitcast <4 x double> %tmp2 to <4 x i64>
+  %and.i = and <4 x i64> %1, %neg.i
+  %2 = bitcast <4 x i64> %and.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vandnps
+define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %x to <8 x i32>
+  %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = bitcast <8 x float> %y to <8 x i32>
+  %and.i = and <8 x i32> %1, %neg.i
+  %2 = bitcast <8 x i32> %and.i to <8 x float>
+  ret <8 x float> %2
+}
+
+; CHECK: vandnps (%
+define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+  %tmp2 = load <8 x float>* %x, align 32
+  %0 = bitcast <8 x float> %y to <8 x i32>
+  %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = bitcast <8 x float> %tmp2 to <8 x i32>
+  %and.i = and <8 x i32> %1, %neg.i
+  %2 = bitcast <8 x i32> %and.i to <8 x float>
+  ret <8 x float> %2
+}
diff --git a/test/CodeGen/X86/avx-movdup.ll b/test/CodeGen/X86/avx-movdup.ll
new file mode 100644
index 00000000000..42d84def98a
--- /dev/null
+++ b/test/CodeGen/X86/avx-movdup.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vmovsldup
+define <8 x float> @movdupA(<8 x float> %src) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK: vmovshdup
+define <8 x float> @movdupB(<8 x float> %src) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK: vmovsldup
+define <4 x i64> @movdupC(<4 x i64> %src) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x i64> %src to <8 x float>
+  %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  %1 = bitcast <8 x float> %shuffle.i to <4 x i64>
+  ret <4 x i64> %1
+}
+
+; CHECK: vmovshdup
+define <4 x i64> @movdupD(<4 x i64> %src) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x i64> %src to <8 x float>
+  %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  %1 = bitcast <8 x float> %shuffle.i to <4 x i64>
+  ret <4 x i64> %1
+}
+
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
new file mode 100644
index 00000000000..36d469417f9
--- /dev/null
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd
+
+; CHECK: vextractf128 $0
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: vinsertf128 $1
+; CHECK-NEXT: vpermilps $85
+define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <32 x i8> %shuffle
+}
+
+; CHECK: vextractf128 $0
+; CHECK-NEXT: punpckhwd
+; CHECK-NEXT: vinsertf128 $1
+; CHECK-NEXT: vpermilps $85
+define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <16 x i16> %shuffle
+}
+
+; CHECK: vmovd
+; CHECK-NEXT: movlhps
+; CHECK-NEXT: vinsertf128 $1
+define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
+entry:
+  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
+  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
+  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
+  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
+  ret <4 x i64> %vecinit6.i
+}
+
+; CHECK: vshufpd
+; CHECK-NEXT: vinsertf128 $1
+define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
+entry:
+  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
+  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
+  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
+  ret <4 x double> %vecinit6.i
+}
+
+; Test this simple opt:
+;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+; To:
+;   shuffle (vload ptr)), undef, <1, 1, 1, 1>
+; CHECK: vmovaps
+; CHECK-NEXT: vpextrd
+define void @funcE() nounwind {
+allocas:
+  %udx495 = alloca [18 x [18 x float]], align 32
+  br label %for_test505.preheader
+
+for_test505.preheader:                            ; preds = %for_test505.preheader, %allocas
+  br i1 undef, label %for_exit499, label %for_test505.preheader
+
+for_exit499:                                      ; preds = %for_test505.preheader
+  br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
+
+load.i1247:                                       ; preds = %for_exit499
+  %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
+  %ptr.i1237 = bitcast float* %ptr1227 to i32*
+  %val.i1238 = load i32* %ptr.i1237, align 4
+  %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
+  %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
+  %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
+  br label %__load_and_broadcast_32.exit1249
+
+__load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_exit499
+  %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
+  ret void
+}
+
diff --git a/test/CodeGen/X86/avx-unpack.ll b/test/CodeGen/X86/avx-unpack.ll
new file mode 100644
index 00000000000..4e906ee1555
--- /dev/null
+++ b/test/CodeGen/X86/avx-unpack.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vunpckhps
+define <8 x float> @unpackhips(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK: vunpckhpd
+define <4 x double> @unpackhipd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x double> %shuffle.i
+}
+
+; CHECK: vunpcklps
+define <8 x float> @unpacklops(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK: vunpcklpd
+define <4 x double> @unpacklopd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x double> %shuffle.i
+}
+
+; CHECK-NOT: vunpcklps %ymm
+define <8 x float> @unpacklops-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK-NOT: vunpcklpd %ymm
+define <4 x double> @unpacklopd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x double> %shuffle.i
+}
+
+; CHECK-NOT: vunpckhps %ymm
+define <8 x float> @unpackhips-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x float> %shuffle.i
+}
+
+; CHECK-NOT: vunpckhpd %ymm
+define <4 x double> @unpackhipd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x double> %shuffle.i
+}
+