Unaligned loads should use the VMOVUPS opcode.

author Nadav Rotem <nrotem@apple.com>

Thu, 14 Mar 2013 23:49:44 +0000 (23:49 +0000)

committer Nadav Rotem <nrotem@apple.com>

Thu, 14 Mar 2013 23:49:44 +0000 (23:49 +0000)
author Nadav Rotem <nrotem@apple.com>
Thu, 14 Mar 2013 23:49:44 +0000 (23:49 +0000)
committer Nadav Rotem <nrotem@apple.com>
Thu, 14 Mar 2013 23:49:44 +0000 (23:49 +0000)
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 097975275732261f4b569440e614eb7aa3980ea4..f22a6b29cd2c4757af46955a5c91e2a50345ed1e 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1009,7 +1009,7 @@ let Predicates = [HasAVX] in {
              (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
    def : Pat<(store (v8i16 (extract_subvector
                             (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
-            (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+            (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
    def : Pat<(store (v16i8 (extract_subvector
                             (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
              (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll

index 77a7c4f945f105886a8afa4c1f86717d20ab7ee8..432852d47d306fe2f0c111b6638384ebc75c830e 100644 (file)
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -55,7 +55,7 @@ define void @storev16i16(<16 x i16> %a) nounwind {
  
  ; CHECK: storev16i16_01
  ; CHECK: vextractf128
-; CHECK: vmovaps  %xmm
+; CHECK: vmovups  %xmm
  define void @storev16i16_01(<16 x i16> %a) nounwind {
    store <16 x i16> %a, <16 x i16>* undef, align 4
    unreachable
diff --git a/test/CodeGen/X86/vec_align_i256.ll b/test/CodeGen/X86/vec_align_i256.ll

new file mode 100644 (file)

index 0000000..44ca5c5
--- /dev/null
+++ b/test/CodeGen/X86/vec_align_i256.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mcpu=corei7-avx | FileCheck %s 
+
+; Make sure that we are not generating a movaps because the vector is aligned to 1.
+;CHECK: @foo
+;CHECK: xor
+;CHECK-NEXT: vmovups
+;CHECK-NEXT: ret
+define void @foo() {
+  store <16 x i16> zeroinitializer, <16 x i16>* undef, align 1
+  ret void
+}
author	Nadav Rotem <nrotem@apple.com>
	Thu, 14 Mar 2013 23:49:44 +0000 (23:49 +0000)
committer	Nadav Rotem <nrotem@apple.com>
	Thu, 14 Mar 2013 23:49:44 +0000 (23:49 +0000)
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/avx-load-store.ll		patch \| blob \| history
test/CodeGen/X86/vec_align_i256.ll	[new file with mode: 0644]	patch \| blob