Add another missing pattern. llvm-gcc likes f64 but clang likes i64 so it was generat...

author Evan Cheng <evan.cheng@apple.com>

Tue, 29 Nov 2011 22:48:34 +0000 (22:48 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Tue, 29 Nov 2011 22:48:34 +0000 (22:48 +0000)
author Evan Cheng <evan.cheng@apple.com>
Tue, 29 Nov 2011 22:48:34 +0000 (22:48 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Tue, 29 Nov 2011 22:48:34 +0000 (22:48 +0000)
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 22a7fa478509fab78a9bdc096065767905e18832..0dca0007d247a3a9dbe20e7760e8b93e342f92a5 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1150,6 +1150,9 @@ let Predicates = [HasAVX] in {
    def : Pat<(X86Movlhps VR128:$src1,
                   (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
              (VMOVHPSrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86Movlhps VR128:$src1,
+                 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+            (VMOVHPSrm VR128:$src1, addr:$src2)>;
    def : Pat<(X86Movlhps VR128:$src1,
                   (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
              (VMOVHPSrm VR128:$src1, addr:$src2)>;
@@ -1183,6 +1186,9 @@ let Predicates = [HasSSE1] in {
    def : Pat<(X86Movlhps VR128:$src1,
                   (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
              (MOVHPSrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86Movlhps VR128:$src1,
+                 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+            (MOVHPSrm VR128:$src1, addr:$src2)>;
    def : Pat<(X86Movlhps VR128:$src1,
                   (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
              (MOVHPSrm VR128:$src1, addr:$src2)>;
diff --git a/test/CodeGen/X86/vec_shuffle-38.ll b/test/CodeGen/X86/vec_shuffle-38.ll

index 66da013665f6831d8f8314e20d3aef0c03a00208..96ef883c4e1e44ebea412b449482b92ace501ce2 100644 (file)
--- a/test/CodeGen/X86/vec_shuffle-38.ll
+++ b/test/CodeGen/X86/vec_shuffle-38.ll
@@ -46,7 +46,7 @@ entry:
  
  ; rdar://10119696
  ; CHECK: f
-define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind uwtable readonly ssp {
+define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind readonly ssp {
  entry:
    ; CHECK: movlps  (%{{rdi|rdx}}), %xmm0
    %u110.i = load double* %y, align 1
@@ -56,3 +56,22 @@ entry:
    ret <4 x float> %shuffle.i
  }
  
+define <4 x float> @loadhpi2(%struct.Float2* nocapture %vHiCoefPtr_0, %struct.Float2* nocapture %vLoCoefPtr_0, i32 %s) nounwind readonly ssp {
+entry:
+; CHECK: loadhpi2
+; CHECK: movhps (
+; CHECK-NOT: movlhps
+  %0 = bitcast %struct.Float2* %vHiCoefPtr_0 to <1 x i64>*
+  %idx.ext = sext i32 %s to i64
+  %add.ptr = getelementptr inbounds <1 x i64>* %0, i64 %idx.ext
+  %add.ptr.val = load <1 x i64>* %add.ptr, align 1
+  %1 = bitcast <1 x i64> %add.ptr.val to <2 x float>
+  %shuffle.i = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %2 = bitcast %struct.Float2* %vLoCoefPtr_0 to <1 x i64>*
+  %add.ptr2 = getelementptr inbounds <1 x i64>* %2, i64 %idx.ext
+  %add.ptr2.val = load <1 x i64>* %add.ptr2, align 1
+  %3 = bitcast <1 x i64> %add.ptr2.val to <2 x float>
+  %shuffle.i4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle1.i5 = shufflevector <4 x float> %shuffle.i, <4 x float> %shuffle.i4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x float> %shuffle1.i5
+}
author	Evan Cheng <evan.cheng@apple.com>
	Tue, 29 Nov 2011 22:48:34 +0000 (22:48 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Tue, 29 Nov 2011 22:48:34 +0000 (22:48 +0000)
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/vec_shuffle-38.ll		patch \| blob \| history