[X86] add an exedepfix entry for movq == movlps == movlpd

author Sanjay Patel <spatel@rotateright.com>

Wed, 15 Apr 2015 15:47:51 +0000 (15:47 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Wed, 15 Apr 2015 15:47:51 +0000 (15:47 +0000)
author Sanjay Patel <spatel@rotateright.com>
Wed, 15 Apr 2015 15:47:51 +0000 (15:47 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Wed, 15 Apr 2015 15:47:51 +0000 (15:47 +0000)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 0154afebb44e3c65f94593c328a1e06e28903c07..fbfd868600826577c45add74364e32be409a8a2e 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -5999,6 +5999,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
    { X86::MOVAPSrr,   X86::MOVAPDrr,  X86::MOVDQArr  },
    { X86::MOVUPSmr,   X86::MOVUPDmr,  X86::MOVDQUmr  },
    { X86::MOVUPSrm,   X86::MOVUPDrm,  X86::MOVDQUrm  },
+  { X86::MOVLPSmr,   X86::MOVLPDmr,  X86::MOVPQI2QImr  },
    { X86::MOVNTPSmr,  X86::MOVNTPDmr, X86::MOVNTDQmr },
    { X86::ANDNPSrm,   X86::ANDNPDrm,  X86::PANDNrm   },
    { X86::ANDNPSrr,   X86::ANDNPDrr,  X86::PANDNrr   },
@@ -6014,6 +6015,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
    { X86::VMOVAPSrr,  X86::VMOVAPDrr,  X86::VMOVDQArr  },
    { X86::VMOVUPSmr,  X86::VMOVUPDmr,  X86::VMOVDQUmr  },
    { X86::VMOVUPSrm,  X86::VMOVUPDrm,  X86::VMOVDQUrm  },
+  // TODO: Add the AVX versions of MOVLPSmr
    { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
    { X86::VANDNPSrm,  X86::VANDNPDrm,  X86::VPANDNrm   },
    { X86::VANDNPSrr,  X86::VANDNPDrr,  X86::VPANDNrr   },
diff --git a/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/test/CodeGen/X86/2011-10-19-widen_vselect.ll

index da3c3222826037130864ffde3b7e30202914c918..07dff9539cd0282f845c4c037e25ba7b31c28d31 100644 (file)
--- a/test/CodeGen/X86/2011-10-19-widen_vselect.ll
+++ b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -26,7 +26,7 @@ entry:
  }
  
  ; CHECK-LABEL: zero_test
-; CHECK: pxor %xmm0, %xmm0
+; CHECK: xorps %xmm0, %xmm0
  ; CHECK: ret
  
  define void @zero_test() {
diff --git a/test/CodeGen/X86/2012-07-10-extload64.ll b/test/CodeGen/X86/2012-07-10-extload64.ll

index f33fc8c69ef407134426b975383732794536492a..a366102fbd74251b22e0ddae49b549e7f96c0dc0 100644 (file)
--- a/test/CodeGen/X86/2012-07-10-extload64.ll
+++ b/test/CodeGen/X86/2012-07-10-extload64.ll
@@ -6,7 +6,7 @@ entry:
  ; CHECK: pmovzxwd
    %A27 = load <4 x i16>, <4 x i16>* %in, align 4
    %A28 = add <4 x i16> %A27, %A27
-; CHECK: movlpd
+; CHECK: movq
    store <4 x i16> %A28, <4 x i16>* %in, align 4
    ret void
  ; CHECK: ret
@@ -18,7 +18,7 @@ define void @store_64(<2 x i32>* %ptr) {
  BB:
    store <2 x i32> zeroinitializer, <2 x i32>* %ptr
    ret void
-;CHECK: movlpd
+;CHECK: movlps
  ;CHECK: ret
  }
  
diff --git a/test/CodeGen/X86/exedeps-movq.ll b/test/CodeGen/X86/exedeps-movq.ll

new file mode 100644 (file)

index 0000000..b702c87
--- /dev/null
+++ b/test/CodeGen/X86/exedeps-movq.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX
+
+; Verify that we select the correct version of the instruction that stores the low 64-bits
+; of a 128-bit vector. We want to avoid int/fp domain crossing penalties, so ignore the
+; bitcast ops and choose:
+;
+; movlps for floats
+; movlpd for doubles
+; movq for integers
+
+define void @store_floats(<4 x float> %x, i64* %p) {
+; SSE-LABEL: store_floats:
+; SSE:       # BB#0:
+; SSE-NEXT:    addps %xmm0, %xmm0
+; SSE-NEXT:    movlps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: store_floats:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddps %xmm0, %xmm0, %xmm0
+
+
+; !!! FIXME - the AVX version is not handled correctly.
+; AVX-NEXT:    vmovq %xmm0, (%rdi)
+
+
+; AVX-NEXT:    retq
+  %a = fadd <4 x float> %x, %x
+  %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %c = bitcast <2 x float> %b to i64
+  store i64 %c, i64* %p
+  ret void
+}
+
+define void @store_double(<2 x double> %x, i64* %p) {
+; SSE-LABEL: store_double:
+; SSE:       # BB#0:
+; SSE-NEXT:    addpd %xmm0, %xmm0
+; SSE-NEXT:    movlpd %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: store_double:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX-NEXT:    retq
+  %a = fadd <2 x double> %x, %x
+  %b = extractelement <2 x double> %a, i32 0
+  %c = bitcast double %b to i64
+  store i64 %c, i64* %p
+  ret void
+}
+
+define void @store_int(<4 x i32> %x, <2 x float>* %p) {
+; SSE-LABEL: store_int:
+; SSE:       # BB#0:
+; SSE-NEXT:    paddd %xmm0, %xmm0
+; SSE-NEXT:    movq %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: store_int:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovq %xmm0, (%rdi)
+; AVX-NEXT:    retq
+  %a = add <4 x i32> %x, %x
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %c = bitcast <2 x i32> %b to <2 x float>
+  store <2 x float> %c, <2 x float>* %p
+  ret void
+}
+
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll

index cab62a386610419d4c4bede383a106e0effdf9d2..5afebd24bb4285c6628c7a0dd2dc98a6702a4d06 100644 (file)
--- a/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -581,7 +581,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
  define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
    ; CHECK: test_x86_sse2_storel_dq
    ; CHECK: movl
-  ; CHECK: movq
+  ; CHECK: movlps
    call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
    ret void
  }
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll

index 26ac2a23e31de879df565d20e60114f4c273f716..4018a21090e7bcfc4a7a85188afe847c9d508e31 100644 (file)
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -9,7 +9,7 @@ define void  @t1(i32 %a, x86_mmx* %P) nounwind {
  ; CHECK-NEXT:    shll $12, %ecx
  ; CHECK-NEXT:    movd %ecx, %xmm0
  ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
-; CHECK-NEXT:    movlpd %xmm0, (%eax)
+; CHECK-NEXT:    movq %xmm0, (%eax)
  ; CHECK-NEXT:    retl
   %tmp12 = shl i32 %a, 12
   %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
diff --git a/test/CodeGen/X86/vec_insert-mmx.ll b/test/CodeGen/X86/vec_insert-mmx.ll

index c77bffcbd3777b7d67c4016150dc70f71e685a90..cbd420885ac11b604aff23f5dd96ace13e546c45 100644 (file)
--- a/test/CodeGen/X86/vec_insert-mmx.ll
+++ b/test/CodeGen/X86/vec_insert-mmx.ll
@@ -7,7 +7,7 @@ define x86_mmx @t0(i32 %A) nounwind {
  ; X86-32:       ## BB#0:
  ; X86-32:    movd {{[0-9]+}}(%esp), %xmm0
  ; X86-32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
-; X86-32-NEXT:    movlpd %xmm0, (%esp)
+; X86-32-NEXT:    movq %xmm0, (%esp)
  ; X86-32-NEXT:    movq (%esp), %mm0
  ; X86-32-NEXT:    addl $12, %esp
  ; X86-32-NEXT:    retl
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll

index 66c64d3252265eb30eacb445e074ebd7570fd2b1..8ed8083a284fb242a918b8a98ef6718f56330e53 100644 (file)
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -9,7 +9,7 @@
  
  define void @test1() {
  ;CHECK-LABEL: @test1
-;CHECK: xorpd
+;CHECK: xorps
    store <1 x i64> zeroinitializer, <1 x i64>* @M1
    store <2 x i32> zeroinitializer, <2 x i32>* @M2
    ret void
diff --git a/test/CodeGen/X86/vector-shuffle-mmx.ll b/test/CodeGen/X86/vector-shuffle-mmx.ll

index 094722d26808555b64f0a16bd2cb94e0a5376045..dbccd2694b070dcc51ea70ab41a2c3badd569452 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-mmx.ll
+++ b/test/CodeGen/X86/vector-shuffle-mmx.ll
@@ -9,7 +9,7 @@ define void @test0(<1 x i64>* %x) {
  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
  ; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X32-NEXT:    movlpd %xmm0, (%eax)
+; X32-NEXT:    movq %xmm0, (%eax)
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test0:
@@ -38,13 +38,13 @@ define void @test1() {
  ; X32-NEXT:    .cfi_def_cfa_offset 24
  ; X32-NEXT:  Ltmp2:
  ; X32-NEXT:    .cfi_offset %edi, -8
-; X32-NEXT:    xorpd %xmm0, %xmm0
-; X32-NEXT:    movlpd %xmm0, (%esp)
+; X32-NEXT:    xorps %xmm0, %xmm0
+; X32-NEXT:    movlps %xmm0, (%esp)
  ; X32-NEXT:    movq (%esp), %mm0
  ; X32-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
  ; X32-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
  ; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-NEXT:    movlpd %xmm0, {{[0-9]+}}(%esp)
+; X32-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
  ; X32-NEXT:    movq {{[0-9]+}}(%esp), %mm1
  ; X32-NEXT:    xorl %edi, %edi
  ; X32-NEXT:    maskmovq %mm1, %mm0
@@ -54,8 +54,8 @@ define void @test1() {
  ;
  ; X64-LABEL: test1:
  ; X64:       ## BB#0: ## %entry
-; X64-NEXT:    pxor %xmm0, %xmm0
-; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    movlps %xmm0, -{{[0-9]+}}(%rsp)
  ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %mm0
  ; X64-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
  ; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll

index 6b7f489bf854b6b9fe5b67520c55ad6056933271..b0240ddb043334bcfbbaa3b1ea6066f1fbd4405e 100644 (file)
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -3,12 +3,14 @@
  
  ; CHECK: movl
  ; CHECK: paddw
-; CHECK: movlpd
+; CHECK: movq
+
+; FIXME - if this test cares about scheduling, why isn't it being checked?
  
  ; Scheduler causes produce a different instruction order
  ; ATOM: movl
  ; ATOM: paddw
-; ATOM: movlpd
+; ATOM: movq
  
  ; bitcast a v4i16 to v2i32
  
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll

index 060dfb1011b446e9e5ecf1d2f02e36365028ddd0..8ed2785ae73a89ca50f42a415ba9b476ee749ca5 100644 (file)
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -52,7 +52,7 @@ forbody:              ; preds = %forcond
  ; CHECK-NEXT: psraw $8
  ; CHECK-NEXT: psraw $2
  ; CHECK-NEXT: pshufb
-; CHECK-NEXT: movlpd
+; CHECK-NEXT: movq
  ;
  ; FIXME: We shouldn't require both a movd and an insert.
  ; CHECK-WIDE: %forbody
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll

index ccf0bd1d0b62eb4af25eb288d12d19074cb0fb24..4e9d2dfdb5deb1aa4397b57627147ecd8dc153ec 100644 (file)
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,6 +1,6 @@
  ; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
  ; CHECK: movl
-; CHECK: movlpd
+; CHECK: movq
  
  ; bitcast a i64 to v2i32
  define void @convert(<2 x i32>* %dst.addr, i64 %src) nounwind {
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll

index 2aa870f16eb4a6f557a112204c6bde70ba58e51b..302805213d068dad966f6db04b130e8345b0415f 100644 (file)
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -84,7 +84,7 @@ define void @shuf5(<8 x i8>* %p) nounwind {
  ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; CHECK-NEXT:    movdqa {{.*#+}} xmm0 = [33,33,33,33,33,33,33,33]
  ; CHECK-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; CHECK-NEXT:    movlpd %xmm0, (%eax)
+; CHECK-NEXT:    movq %xmm0, (%eax)
  ; CHECK-NEXT:    retl
    %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    store <8 x i8> %v, <8 x i8>* %p, align 8
author	Sanjay Patel <spatel@rotateright.com>
	Wed, 15 Apr 2015 15:47:51 +0000 (15:47 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Wed, 15 Apr 2015 15:47:51 +0000 (15:47 +0000)
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
test/CodeGen/X86/2011-10-19-widen_vselect.ll		patch \| blob \| history
test/CodeGen/X86/2012-07-10-extload64.ll		patch \| blob \| history
test/CodeGen/X86/exedeps-movq.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/sse2-intrinsics-x86.ll		patch \| blob \| history
test/CodeGen/X86/vec_insert-5.ll		patch \| blob \| history
test/CodeGen/X86/vec_insert-mmx.ll		patch \| blob \| history
test/CodeGen/X86/vec_zero_cse.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-mmx.ll		patch \| blob \| history
test/CodeGen/X86/widen_cast-1.ll		patch \| blob \| history
test/CodeGen/X86/widen_cast-4.ll		patch \| blob \| history
test/CodeGen/X86/widen_cast-5.ll		patch \| blob \| history
test/CodeGen/X86/widen_shuffle-1.ll		patch \| blob \| history